40028 lines
1.0 MiB
40028 lines
1.0 MiB
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 3.0,
|
|
"eval_steps": 500,
|
|
"global_step": 5712,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0005252100840336134,
|
|
"grad_norm": 34.372692050718165,
|
|
"learning_rate": 0.0,
|
|
"loss": 4.102292060852051,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0010504201680672268,
|
|
"grad_norm": 36.51762099089081,
|
|
"learning_rate": 1.7482517482517483e-08,
|
|
"loss": 4.162827491760254,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0015756302521008404,
|
|
"grad_norm": 39.35112829697694,
|
|
"learning_rate": 3.4965034965034967e-08,
|
|
"loss": 4.137117385864258,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.0021008403361344537,
|
|
"grad_norm": 31.085406362496506,
|
|
"learning_rate": 5.244755244755245e-08,
|
|
"loss": 4.171526908874512,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0026260504201680674,
|
|
"grad_norm": 36.90007791074726,
|
|
"learning_rate": 6.993006993006993e-08,
|
|
"loss": 4.477399826049805,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.0031512605042016808,
|
|
"grad_norm": 33.27901393600758,
|
|
"learning_rate": 8.741258741258742e-08,
|
|
"loss": 4.03668212890625,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.003676470588235294,
|
|
"grad_norm": 38.00002780928001,
|
|
"learning_rate": 1.048951048951049e-07,
|
|
"loss": 4.246670722961426,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.004201680672268907,
|
|
"grad_norm": 36.001513542837934,
|
|
"learning_rate": 1.223776223776224e-07,
|
|
"loss": 3.781754732131958,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.004726890756302521,
|
|
"grad_norm": 39.13792126575477,
|
|
"learning_rate": 1.3986013986013987e-07,
|
|
"loss": 4.13449764251709,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.005252100840336135,
|
|
"grad_norm": 34.251627318099416,
|
|
"learning_rate": 1.5734265734265737e-07,
|
|
"loss": 4.231945037841797,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.005777310924369748,
|
|
"grad_norm": 38.693008481778556,
|
|
"learning_rate": 1.7482517482517484e-07,
|
|
"loss": 4.490153789520264,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.0063025210084033615,
|
|
"grad_norm": 28.03685486294642,
|
|
"learning_rate": 1.9230769230769234e-07,
|
|
"loss": 3.567911148071289,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.006827731092436975,
|
|
"grad_norm": 32.79480844249344,
|
|
"learning_rate": 2.097902097902098e-07,
|
|
"loss": 3.733537435531616,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.007352941176470588,
|
|
"grad_norm": 47.860697756169074,
|
|
"learning_rate": 2.2727272727272729e-07,
|
|
"loss": 4.282741546630859,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.007878151260504201,
|
|
"grad_norm": 48.34040544579808,
|
|
"learning_rate": 2.447552447552448e-07,
|
|
"loss": 4.783939361572266,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.008403361344537815,
|
|
"grad_norm": 27.610574129516216,
|
|
"learning_rate": 2.622377622377623e-07,
|
|
"loss": 3.809292793273926,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.008928571428571428,
|
|
"grad_norm": 38.61921628311389,
|
|
"learning_rate": 2.7972027972027973e-07,
|
|
"loss": 4.540763854980469,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.009453781512605041,
|
|
"grad_norm": 38.36264754368808,
|
|
"learning_rate": 2.9720279720279723e-07,
|
|
"loss": 4.1531805992126465,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.009978991596638655,
|
|
"grad_norm": 31.561418642945256,
|
|
"learning_rate": 3.1468531468531473e-07,
|
|
"loss": 3.653745174407959,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.01050420168067227,
|
|
"grad_norm": 33.74062557341127,
|
|
"learning_rate": 3.321678321678322e-07,
|
|
"loss": 4.493264198303223,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.011029411764705883,
|
|
"grad_norm": 32.8043036933105,
|
|
"learning_rate": 3.496503496503497e-07,
|
|
"loss": 3.8892321586608887,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.011554621848739496,
|
|
"grad_norm": 40.810783301273766,
|
|
"learning_rate": 3.6713286713286713e-07,
|
|
"loss": 4.073638439178467,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.01207983193277311,
|
|
"grad_norm": 38.157265350961374,
|
|
"learning_rate": 3.846153846153847e-07,
|
|
"loss": 4.504125118255615,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.012605042016806723,
|
|
"grad_norm": 35.474059489035355,
|
|
"learning_rate": 4.020979020979021e-07,
|
|
"loss": 4.312219619750977,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.013130252100840336,
|
|
"grad_norm": 29.174644468083308,
|
|
"learning_rate": 4.195804195804196e-07,
|
|
"loss": 3.979979991912842,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.01365546218487395,
|
|
"grad_norm": 31.827136755689573,
|
|
"learning_rate": 4.3706293706293707e-07,
|
|
"loss": 4.110677242279053,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.014180672268907563,
|
|
"grad_norm": 38.844533989686525,
|
|
"learning_rate": 4.5454545454545457e-07,
|
|
"loss": 4.600620269775391,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.014705882352941176,
|
|
"grad_norm": 33.36776770479228,
|
|
"learning_rate": 4.72027972027972e-07,
|
|
"loss": 4.107484817504883,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.01523109243697479,
|
|
"grad_norm": 33.2123109875993,
|
|
"learning_rate": 4.895104895104896e-07,
|
|
"loss": 3.9041717052459717,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.015756302521008403,
|
|
"grad_norm": 31.29738620396506,
|
|
"learning_rate": 5.06993006993007e-07,
|
|
"loss": 4.2493743896484375,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.016281512605042018,
|
|
"grad_norm": 37.28210473967995,
|
|
"learning_rate": 5.244755244755246e-07,
|
|
"loss": 4.2800374031066895,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.01680672268907563,
|
|
"grad_norm": 30.510043067679604,
|
|
"learning_rate": 5.41958041958042e-07,
|
|
"loss": 4.002791881561279,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.017331932773109245,
|
|
"grad_norm": 26.80401075149564,
|
|
"learning_rate": 5.594405594405595e-07,
|
|
"loss": 3.7842516899108887,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.017857142857142856,
|
|
"grad_norm": 25.100534103543946,
|
|
"learning_rate": 5.76923076923077e-07,
|
|
"loss": 2.3941800594329834,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.01838235294117647,
|
|
"grad_norm": 31.241036694937726,
|
|
"learning_rate": 5.944055944055945e-07,
|
|
"loss": 4.037428379058838,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.018907563025210083,
|
|
"grad_norm": 25.681059449130732,
|
|
"learning_rate": 6.118881118881119e-07,
|
|
"loss": 3.7350282669067383,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.019432773109243698,
|
|
"grad_norm": 22.999454428628805,
|
|
"learning_rate": 6.293706293706295e-07,
|
|
"loss": 3.6307826042175293,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.01995798319327731,
|
|
"grad_norm": 25.210696458344774,
|
|
"learning_rate": 6.468531468531469e-07,
|
|
"loss": 3.746466636657715,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.020483193277310924,
|
|
"grad_norm": 30.80804783213676,
|
|
"learning_rate": 6.643356643356644e-07,
|
|
"loss": 3.945478677749634,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.02100840336134454,
|
|
"grad_norm": 23.155886619235893,
|
|
"learning_rate": 6.818181818181818e-07,
|
|
"loss": 3.9150636196136475,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.02153361344537815,
|
|
"grad_norm": 23.936510414650403,
|
|
"learning_rate": 6.993006993006994e-07,
|
|
"loss": 3.6661462783813477,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.022058823529411766,
|
|
"grad_norm": 38.55267469232255,
|
|
"learning_rate": 7.167832167832168e-07,
|
|
"loss": 4.267165184020996,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.022584033613445378,
|
|
"grad_norm": 18.85328597089255,
|
|
"learning_rate": 7.342657342657343e-07,
|
|
"loss": 3.355626106262207,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.023109243697478993,
|
|
"grad_norm": 19.375880502190483,
|
|
"learning_rate": 7.517482517482517e-07,
|
|
"loss": 3.7092792987823486,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.023634453781512604,
|
|
"grad_norm": 15.164586828853214,
|
|
"learning_rate": 7.692307692307694e-07,
|
|
"loss": 3.6890788078308105,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.02415966386554622,
|
|
"grad_norm": 15.346695121215316,
|
|
"learning_rate": 7.867132867132868e-07,
|
|
"loss": 2.885910987854004,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.02468487394957983,
|
|
"grad_norm": 15.946481065675485,
|
|
"learning_rate": 8.041958041958043e-07,
|
|
"loss": 3.008267879486084,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.025210084033613446,
|
|
"grad_norm": 21.26001098082826,
|
|
"learning_rate": 8.216783216783217e-07,
|
|
"loss": 3.832730770111084,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.025735294117647058,
|
|
"grad_norm": 14.434196417210957,
|
|
"learning_rate": 8.391608391608393e-07,
|
|
"loss": 3.1919186115264893,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.026260504201680673,
|
|
"grad_norm": 14.09245979625255,
|
|
"learning_rate": 8.566433566433567e-07,
|
|
"loss": 3.3092079162597656,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.026785714285714284,
|
|
"grad_norm": 14.105978255824217,
|
|
"learning_rate": 8.741258741258741e-07,
|
|
"loss": 3.7159504890441895,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.0273109243697479,
|
|
"grad_norm": 11.33389583742992,
|
|
"learning_rate": 8.916083916083917e-07,
|
|
"loss": 2.8575963973999023,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.027836134453781514,
|
|
"grad_norm": 13.7592294205824,
|
|
"learning_rate": 9.090909090909091e-07,
|
|
"loss": 3.6637725830078125,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.028361344537815126,
|
|
"grad_norm": 21.452492952094683,
|
|
"learning_rate": 9.265734265734266e-07,
|
|
"loss": 3.2712621688842773,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.02888655462184874,
|
|
"grad_norm": 18.171724171552793,
|
|
"learning_rate": 9.44055944055944e-07,
|
|
"loss": 4.053844451904297,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.029411764705882353,
|
|
"grad_norm": 12.70889446822849,
|
|
"learning_rate": 9.615384615384617e-07,
|
|
"loss": 3.6038215160369873,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.029936974789915968,
|
|
"grad_norm": 18.06293758360709,
|
|
"learning_rate": 9.790209790209791e-07,
|
|
"loss": 3.310293674468994,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.03046218487394958,
|
|
"grad_norm": 16.298729106431914,
|
|
"learning_rate": 9.965034965034966e-07,
|
|
"loss": 3.351804256439209,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.030987394957983194,
|
|
"grad_norm": 11.66177063101149,
|
|
"learning_rate": 1.013986013986014e-06,
|
|
"loss": 3.5310189723968506,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.031512605042016806,
|
|
"grad_norm": 16.79127622060395,
|
|
"learning_rate": 1.0314685314685317e-06,
|
|
"loss": 3.427992343902588,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.03203781512605042,
|
|
"grad_norm": 10.889976358660403,
|
|
"learning_rate": 1.0489510489510491e-06,
|
|
"loss": 3.3590340614318848,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.032563025210084036,
|
|
"grad_norm": 22.24062386613456,
|
|
"learning_rate": 1.0664335664335666e-06,
|
|
"loss": 3.339024305343628,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.03308823529411765,
|
|
"grad_norm": 14.853723489288882,
|
|
"learning_rate": 1.083916083916084e-06,
|
|
"loss": 3.7138495445251465,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.03361344537815126,
|
|
"grad_norm": 20.294398852176837,
|
|
"learning_rate": 1.1013986013986015e-06,
|
|
"loss": 4.287203311920166,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.03413865546218487,
|
|
"grad_norm": 15.44050443054915,
|
|
"learning_rate": 1.118881118881119e-06,
|
|
"loss": 3.9542369842529297,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.03466386554621849,
|
|
"grad_norm": 43.24480760624098,
|
|
"learning_rate": 1.1363636363636364e-06,
|
|
"loss": 4.417222023010254,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.0351890756302521,
|
|
"grad_norm": 16.74594067652709,
|
|
"learning_rate": 1.153846153846154e-06,
|
|
"loss": 3.835775375366211,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.03571428571428571,
|
|
"grad_norm": 16.264180303923414,
|
|
"learning_rate": 1.1713286713286715e-06,
|
|
"loss": 3.7545199394226074,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.03623949579831933,
|
|
"grad_norm": 12.139972439257933,
|
|
"learning_rate": 1.188811188811189e-06,
|
|
"loss": 4.071127891540527,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.03676470588235294,
|
|
"grad_norm": 24.150412848493595,
|
|
"learning_rate": 1.2062937062937064e-06,
|
|
"loss": 3.519155502319336,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.037289915966386554,
|
|
"grad_norm": 16.08963114427864,
|
|
"learning_rate": 1.2237762237762238e-06,
|
|
"loss": 3.74234938621521,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.037815126050420166,
|
|
"grad_norm": 20.34658014070812,
|
|
"learning_rate": 1.2412587412587413e-06,
|
|
"loss": 3.2490570545196533,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.038340336134453784,
|
|
"grad_norm": 11.333461679214565,
|
|
"learning_rate": 1.258741258741259e-06,
|
|
"loss": 3.378715991973877,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.038865546218487396,
|
|
"grad_norm": 15.421380175445977,
|
|
"learning_rate": 1.2762237762237764e-06,
|
|
"loss": 3.179351806640625,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.03939075630252101,
|
|
"grad_norm": 12.364609041568757,
|
|
"learning_rate": 1.2937062937062938e-06,
|
|
"loss": 3.696169853210449,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.03991596638655462,
|
|
"grad_norm": 14.797487208066967,
|
|
"learning_rate": 1.3111888111888113e-06,
|
|
"loss": 3.8278417587280273,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.04044117647058824,
|
|
"grad_norm": 19.575504410027442,
|
|
"learning_rate": 1.3286713286713287e-06,
|
|
"loss": 3.6845762729644775,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.04096638655462185,
|
|
"grad_norm": 9.795650741073091,
|
|
"learning_rate": 1.3461538461538462e-06,
|
|
"loss": 3.3823916912078857,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.04149159663865546,
|
|
"grad_norm": 10.62079090075366,
|
|
"learning_rate": 1.3636363636363636e-06,
|
|
"loss": 3.334550380706787,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.04201680672268908,
|
|
"grad_norm": 8.883324972999093,
|
|
"learning_rate": 1.381118881118881e-06,
|
|
"loss": 2.9630346298217773,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04254201680672269,
|
|
"grad_norm": 16.176590194969382,
|
|
"learning_rate": 1.3986013986013987e-06,
|
|
"loss": 2.7338333129882812,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.0430672268907563,
|
|
"grad_norm": 20.83406849219658,
|
|
"learning_rate": 1.4160839160839162e-06,
|
|
"loss": 3.1813783645629883,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.043592436974789914,
|
|
"grad_norm": 10.425495266080052,
|
|
"learning_rate": 1.4335664335664336e-06,
|
|
"loss": 3.501983642578125,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.04411764705882353,
|
|
"grad_norm": 33.92403196912798,
|
|
"learning_rate": 1.451048951048951e-06,
|
|
"loss": 4.1713409423828125,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.044642857142857144,
|
|
"grad_norm": 14.58308011283638,
|
|
"learning_rate": 1.4685314685314685e-06,
|
|
"loss": 3.1751015186309814,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.045168067226890755,
|
|
"grad_norm": 9.735714288847259,
|
|
"learning_rate": 1.486013986013986e-06,
|
|
"loss": 2.9015283584594727,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.04569327731092437,
|
|
"grad_norm": 12.752318960038618,
|
|
"learning_rate": 1.5034965034965034e-06,
|
|
"loss": 3.1568045616149902,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.046218487394957986,
|
|
"grad_norm": 10.24072405792317,
|
|
"learning_rate": 1.5209790209790213e-06,
|
|
"loss": 3.683905601501465,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.0467436974789916,
|
|
"grad_norm": 16.137425682786226,
|
|
"learning_rate": 1.5384615384615387e-06,
|
|
"loss": 3.0892813205718994,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.04726890756302521,
|
|
"grad_norm": 8.408487827632596,
|
|
"learning_rate": 1.5559440559440562e-06,
|
|
"loss": 3.147287130355835,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.04779411764705882,
|
|
"grad_norm": 21.475842549976687,
|
|
"learning_rate": 1.5734265734265736e-06,
|
|
"loss": 3.7005114555358887,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.04831932773109244,
|
|
"grad_norm": 11.167540754791897,
|
|
"learning_rate": 1.590909090909091e-06,
|
|
"loss": 3.4915692806243896,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.04884453781512605,
|
|
"grad_norm": 13.65410221246126,
|
|
"learning_rate": 1.6083916083916085e-06,
|
|
"loss": 2.9103403091430664,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.04936974789915966,
|
|
"grad_norm": 15.532940581970639,
|
|
"learning_rate": 1.625874125874126e-06,
|
|
"loss": 2.892765522003174,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.04989495798319328,
|
|
"grad_norm": 9.5126247250127,
|
|
"learning_rate": 1.6433566433566434e-06,
|
|
"loss": 2.752000093460083,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.05042016806722689,
|
|
"grad_norm": 15.05522579035742,
|
|
"learning_rate": 1.660839160839161e-06,
|
|
"loss": 3.762812852859497,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.050945378151260504,
|
|
"grad_norm": 9.850059079966185,
|
|
"learning_rate": 1.6783216783216785e-06,
|
|
"loss": 3.203524589538574,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.051470588235294115,
|
|
"grad_norm": 11.003796272875793,
|
|
"learning_rate": 1.695804195804196e-06,
|
|
"loss": 2.939239025115967,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.051995798319327734,
|
|
"grad_norm": 7.308308991898506,
|
|
"learning_rate": 1.7132867132867134e-06,
|
|
"loss": 3.039363145828247,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.052521008403361345,
|
|
"grad_norm": 18.340776852855072,
|
|
"learning_rate": 1.7307692307692308e-06,
|
|
"loss": 3.8210678100585938,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05304621848739496,
|
|
"grad_norm": 12.528552794960676,
|
|
"learning_rate": 1.7482517482517483e-06,
|
|
"loss": 2.9607348442077637,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.05357142857142857,
|
|
"grad_norm": 11.610455034799767,
|
|
"learning_rate": 1.7657342657342657e-06,
|
|
"loss": 3.047253370285034,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.05409663865546219,
|
|
"grad_norm": 8.650454729688116,
|
|
"learning_rate": 1.7832167832167834e-06,
|
|
"loss": 3.390791177749634,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.0546218487394958,
|
|
"grad_norm": 11.358288501544784,
|
|
"learning_rate": 1.8006993006993008e-06,
|
|
"loss": 2.8329169750213623,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.05514705882352941,
|
|
"grad_norm": 12.077596878633972,
|
|
"learning_rate": 1.8181818181818183e-06,
|
|
"loss": 3.383918285369873,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.05567226890756303,
|
|
"grad_norm": 16.712929429294753,
|
|
"learning_rate": 1.8356643356643357e-06,
|
|
"loss": 5.218741416931152,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.05619747899159664,
|
|
"grad_norm": 12.765028044884678,
|
|
"learning_rate": 1.8531468531468532e-06,
|
|
"loss": 3.01009202003479,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.05672268907563025,
|
|
"grad_norm": 7.58903376172291,
|
|
"learning_rate": 1.8706293706293706e-06,
|
|
"loss": 2.75844144821167,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.05724789915966386,
|
|
"grad_norm": 8.522158351054237,
|
|
"learning_rate": 1.888111888111888e-06,
|
|
"loss": 2.7749361991882324,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.05777310924369748,
|
|
"grad_norm": 8.608247143023526,
|
|
"learning_rate": 1.9055944055944055e-06,
|
|
"loss": 3.086836814880371,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.058298319327731093,
|
|
"grad_norm": 8.843532629640318,
|
|
"learning_rate": 1.9230769230769234e-06,
|
|
"loss": 3.258808135986328,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.058823529411764705,
|
|
"grad_norm": 14.132519139896296,
|
|
"learning_rate": 1.9405594405594406e-06,
|
|
"loss": 2.722972869873047,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.05934873949579832,
|
|
"grad_norm": 11.283796170078592,
|
|
"learning_rate": 1.9580419580419583e-06,
|
|
"loss": 3.538330316543579,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.059873949579831935,
|
|
"grad_norm": 8.882121571795718,
|
|
"learning_rate": 1.9755244755244755e-06,
|
|
"loss": 2.9473233222961426,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.06039915966386555,
|
|
"grad_norm": 10.461716733325476,
|
|
"learning_rate": 1.993006993006993e-06,
|
|
"loss": 3.2069764137268066,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.06092436974789916,
|
|
"grad_norm": 10.79135880503189,
|
|
"learning_rate": 2.0104895104895104e-06,
|
|
"loss": 2.776052951812744,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.06144957983193277,
|
|
"grad_norm": 12.852647194585002,
|
|
"learning_rate": 2.027972027972028e-06,
|
|
"loss": 3.164353370666504,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.06197478991596639,
|
|
"grad_norm": 20.595577508728553,
|
|
"learning_rate": 2.0454545454545457e-06,
|
|
"loss": 3.7819983959198,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.0625,
|
|
"grad_norm": 7.416532992444785,
|
|
"learning_rate": 2.0629370629370634e-06,
|
|
"loss": 3.207777500152588,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.06302521008403361,
|
|
"grad_norm": 8.288644250013293,
|
|
"learning_rate": 2.0804195804195806e-06,
|
|
"loss": 3.3697268962860107,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06355042016806722,
|
|
"grad_norm": 11.053204236520791,
|
|
"learning_rate": 2.0979020979020983e-06,
|
|
"loss": 3.0271708965301514,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.06407563025210083,
|
|
"grad_norm": 10.881721316994767,
|
|
"learning_rate": 2.1153846153846155e-06,
|
|
"loss": 2.805438756942749,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.06460084033613446,
|
|
"grad_norm": 11.540998180504324,
|
|
"learning_rate": 2.132867132867133e-06,
|
|
"loss": 2.9184906482696533,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.06512605042016807,
|
|
"grad_norm": 9.557384208664955,
|
|
"learning_rate": 2.1503496503496504e-06,
|
|
"loss": 3.7422034740448,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.06565126050420168,
|
|
"grad_norm": 11.178002440365203,
|
|
"learning_rate": 2.167832167832168e-06,
|
|
"loss": 2.6965441703796387,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.0661764705882353,
|
|
"grad_norm": 7.619594789595347,
|
|
"learning_rate": 2.1853146853146857e-06,
|
|
"loss": 3.0583324432373047,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.0667016806722689,
|
|
"grad_norm": 9.387918036172614,
|
|
"learning_rate": 2.202797202797203e-06,
|
|
"loss": 2.6428847312927246,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.06722689075630252,
|
|
"grad_norm": 11.323075383472942,
|
|
"learning_rate": 2.2202797202797206e-06,
|
|
"loss": 3.23486328125,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.06775210084033613,
|
|
"grad_norm": 13.402078543796202,
|
|
"learning_rate": 2.237762237762238e-06,
|
|
"loss": 3.3365354537963867,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.06827731092436974,
|
|
"grad_norm": 11.308850890428076,
|
|
"learning_rate": 2.2552447552447555e-06,
|
|
"loss": 3.187561273574829,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.06880252100840337,
|
|
"grad_norm": 8.737315488014152,
|
|
"learning_rate": 2.2727272727272728e-06,
|
|
"loss": 2.4140090942382812,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.06932773109243698,
|
|
"grad_norm": 12.100833741328035,
|
|
"learning_rate": 2.2902097902097904e-06,
|
|
"loss": 3.1806931495666504,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.06985294117647059,
|
|
"grad_norm": 8.511670123183169,
|
|
"learning_rate": 2.307692307692308e-06,
|
|
"loss": 3.1618127822875977,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.0703781512605042,
|
|
"grad_norm": 16.89380862864223,
|
|
"learning_rate": 2.3251748251748253e-06,
|
|
"loss": 3.4971280097961426,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.07090336134453781,
|
|
"grad_norm": 9.189097737179944,
|
|
"learning_rate": 2.342657342657343e-06,
|
|
"loss": 2.7373313903808594,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.07142857142857142,
|
|
"grad_norm": 8.941802174535024,
|
|
"learning_rate": 2.36013986013986e-06,
|
|
"loss": 3.1305856704711914,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.07195378151260504,
|
|
"grad_norm": 6.820542963713525,
|
|
"learning_rate": 2.377622377622378e-06,
|
|
"loss": 2.563201904296875,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.07247899159663866,
|
|
"grad_norm": 11.611160703686382,
|
|
"learning_rate": 2.395104895104895e-06,
|
|
"loss": 2.9833860397338867,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.07300420168067227,
|
|
"grad_norm": 11.973193711874616,
|
|
"learning_rate": 2.4125874125874128e-06,
|
|
"loss": 3.0472545623779297,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.07352941176470588,
|
|
"grad_norm": 13.2367114956955,
|
|
"learning_rate": 2.43006993006993e-06,
|
|
"loss": 3.030299425125122,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.0740546218487395,
|
|
"grad_norm": 10.665734481419761,
|
|
"learning_rate": 2.4475524475524477e-06,
|
|
"loss": 3.1181631088256836,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.07457983193277311,
|
|
"grad_norm": 10.299897816995667,
|
|
"learning_rate": 2.4650349650349653e-06,
|
|
"loss": 3.3858418464660645,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.07510504201680672,
|
|
"grad_norm": 14.658268440587843,
|
|
"learning_rate": 2.4825174825174825e-06,
|
|
"loss": 3.0084147453308105,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.07563025210084033,
|
|
"grad_norm": 8.416572561906216,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 2.6434106826782227,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.07615546218487394,
|
|
"grad_norm": 12.773713450167529,
|
|
"learning_rate": 2.517482517482518e-06,
|
|
"loss": 3.660475730895996,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.07668067226890757,
|
|
"grad_norm": 13.643829409806205,
|
|
"learning_rate": 2.534965034965035e-06,
|
|
"loss": 2.801238775253296,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.07720588235294118,
|
|
"grad_norm": 9.936924513187812,
|
|
"learning_rate": 2.5524475524475528e-06,
|
|
"loss": 3.1104047298431396,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.07773109243697479,
|
|
"grad_norm": 9.006787964351338,
|
|
"learning_rate": 2.56993006993007e-06,
|
|
"loss": 2.8557915687561035,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.0782563025210084,
|
|
"grad_norm": 8.546555604210928,
|
|
"learning_rate": 2.5874125874125877e-06,
|
|
"loss": 2.5419888496398926,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.07878151260504201,
|
|
"grad_norm": 9.70164848054522,
|
|
"learning_rate": 2.604895104895105e-06,
|
|
"loss": 3.414214849472046,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.07930672268907563,
|
|
"grad_norm": 15.884940591453903,
|
|
"learning_rate": 2.6223776223776225e-06,
|
|
"loss": 2.6538589000701904,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.07983193277310924,
|
|
"grad_norm": 14.316305610479533,
|
|
"learning_rate": 2.63986013986014e-06,
|
|
"loss": 2.4650402069091797,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.08035714285714286,
|
|
"grad_norm": 11.143203112170086,
|
|
"learning_rate": 2.6573426573426574e-06,
|
|
"loss": 3.405933380126953,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.08088235294117647,
|
|
"grad_norm": 9.826512593789431,
|
|
"learning_rate": 2.674825174825175e-06,
|
|
"loss": 2.706834077835083,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.08140756302521009,
|
|
"grad_norm": 15.185136292845078,
|
|
"learning_rate": 2.6923076923076923e-06,
|
|
"loss": 3.490084409713745,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.0819327731092437,
|
|
"grad_norm": 9.493774180363053,
|
|
"learning_rate": 2.70979020979021e-06,
|
|
"loss": 2.6900107860565186,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.08245798319327731,
|
|
"grad_norm": 14.266301028366609,
|
|
"learning_rate": 2.7272727272727272e-06,
|
|
"loss": 2.7169108390808105,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.08298319327731092,
|
|
"grad_norm": 12.370813316919985,
|
|
"learning_rate": 2.744755244755245e-06,
|
|
"loss": 3.0527358055114746,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.08350840336134453,
|
|
"grad_norm": 11.574919073948696,
|
|
"learning_rate": 2.762237762237762e-06,
|
|
"loss": 2.889268636703491,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.08403361344537816,
|
|
"grad_norm": 8.362332310841456,
|
|
"learning_rate": 2.7797202797202798e-06,
|
|
"loss": 2.820096015930176,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.08455882352941177,
|
|
"grad_norm": 10.952826531084153,
|
|
"learning_rate": 2.7972027972027974e-06,
|
|
"loss": 2.741314172744751,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.08508403361344538,
|
|
"grad_norm": 12.96774516203996,
|
|
"learning_rate": 2.8146853146853147e-06,
|
|
"loss": 3.665605068206787,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.08560924369747899,
|
|
"grad_norm": 14.061227256256071,
|
|
"learning_rate": 2.8321678321678323e-06,
|
|
"loss": 3.036461353302002,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.0861344537815126,
|
|
"grad_norm": 12.386763761192109,
|
|
"learning_rate": 2.8496503496503496e-06,
|
|
"loss": 2.961890459060669,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.08665966386554622,
|
|
"grad_norm": 12.069018255605632,
|
|
"learning_rate": 2.8671328671328672e-06,
|
|
"loss": 2.5157463550567627,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.08718487394957983,
|
|
"grad_norm": 16.020284110145372,
|
|
"learning_rate": 2.8846153846153845e-06,
|
|
"loss": 2.6061654090881348,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.08771008403361344,
|
|
"grad_norm": 10.081405378228968,
|
|
"learning_rate": 2.902097902097902e-06,
|
|
"loss": 2.827906608581543,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.08823529411764706,
|
|
"grad_norm": 20.078073394145157,
|
|
"learning_rate": 2.91958041958042e-06,
|
|
"loss": 2.846405506134033,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.08876050420168068,
|
|
"grad_norm": 8.930183892960082,
|
|
"learning_rate": 2.937062937062937e-06,
|
|
"loss": 3.1270575523376465,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.08928571428571429,
|
|
"grad_norm": 8.646868809078281,
|
|
"learning_rate": 2.954545454545455e-06,
|
|
"loss": 2.7245287895202637,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0898109243697479,
|
|
"grad_norm": 9.023992096062173,
|
|
"learning_rate": 2.972027972027972e-06,
|
|
"loss": 2.8285470008850098,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.09033613445378151,
|
|
"grad_norm": 9.408629636600987,
|
|
"learning_rate": 2.98951048951049e-06,
|
|
"loss": 3.208327054977417,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.09086134453781512,
|
|
"grad_norm": 15.435922685471567,
|
|
"learning_rate": 3.006993006993007e-06,
|
|
"loss": 3.037814140319824,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.09138655462184873,
|
|
"grad_norm": 11.863479816829765,
|
|
"learning_rate": 3.024475524475525e-06,
|
|
"loss": 3.15146541595459,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.09191176470588236,
|
|
"grad_norm": 19.641745609151776,
|
|
"learning_rate": 3.0419580419580425e-06,
|
|
"loss": 3.0058999061584473,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.09243697478991597,
|
|
"grad_norm": 9.524444165308065,
|
|
"learning_rate": 3.0594405594405598e-06,
|
|
"loss": 3.323092460632324,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.09296218487394958,
|
|
"grad_norm": 18.77864617215371,
|
|
"learning_rate": 3.0769230769230774e-06,
|
|
"loss": 2.5695178508758545,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.0934873949579832,
|
|
"grad_norm": 9.150670272159463,
|
|
"learning_rate": 3.0944055944055947e-06,
|
|
"loss": 2.732858419418335,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.0940126050420168,
|
|
"grad_norm": 15.584170556028567,
|
|
"learning_rate": 3.1118881118881123e-06,
|
|
"loss": 3.000278949737549,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.09453781512605042,
|
|
"grad_norm": 7.764882511162812,
|
|
"learning_rate": 3.1293706293706296e-06,
|
|
"loss": 2.537888288497925,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.09506302521008403,
|
|
"grad_norm": 12.640917564361425,
|
|
"learning_rate": 3.1468531468531472e-06,
|
|
"loss": 3.4139657020568848,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.09558823529411764,
|
|
"grad_norm": 13.736638173520092,
|
|
"learning_rate": 3.164335664335665e-06,
|
|
"loss": 3.2975075244903564,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.09611344537815127,
|
|
"grad_norm": 11.146515420942752,
|
|
"learning_rate": 3.181818181818182e-06,
|
|
"loss": 2.9420247077941895,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.09663865546218488,
|
|
"grad_norm": 16.077168756793455,
|
|
"learning_rate": 3.1993006993006998e-06,
|
|
"loss": 3.0430757999420166,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.09716386554621849,
|
|
"grad_norm": 7.818414764781988,
|
|
"learning_rate": 3.216783216783217e-06,
|
|
"loss": 2.572394371032715,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.0976890756302521,
|
|
"grad_norm": 11.284887261483664,
|
|
"learning_rate": 3.2342657342657347e-06,
|
|
"loss": 3.490865468978882,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.09821428571428571,
|
|
"grad_norm": 10.56509804328153,
|
|
"learning_rate": 3.251748251748252e-06,
|
|
"loss": 3.073840618133545,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.09873949579831932,
|
|
"grad_norm": 13.842893330036237,
|
|
"learning_rate": 3.2692307692307696e-06,
|
|
"loss": 2.848374843597412,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.09926470588235294,
|
|
"grad_norm": 12.97923818274776,
|
|
"learning_rate": 3.286713286713287e-06,
|
|
"loss": 2.7974178791046143,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.09978991596638656,
|
|
"grad_norm": 10.920365289443275,
|
|
"learning_rate": 3.3041958041958045e-06,
|
|
"loss": 3.00117826461792,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.10031512605042017,
|
|
"grad_norm": 8.240494221367149,
|
|
"learning_rate": 3.321678321678322e-06,
|
|
"loss": 3.0037038326263428,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.10084033613445378,
|
|
"grad_norm": 12.682655438957338,
|
|
"learning_rate": 3.3391608391608394e-06,
|
|
"loss": 2.929532527923584,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.1013655462184874,
|
|
"grad_norm": 13.824729489943875,
|
|
"learning_rate": 3.356643356643357e-06,
|
|
"loss": 3.097743272781372,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.10189075630252101,
|
|
"grad_norm": 6.984366164106969,
|
|
"learning_rate": 3.3741258741258742e-06,
|
|
"loss": 2.898709535598755,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.10241596638655462,
|
|
"grad_norm": 15.648955746370264,
|
|
"learning_rate": 3.391608391608392e-06,
|
|
"loss": 2.750286340713501,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.10294117647058823,
|
|
"grad_norm": 11.296236998793132,
|
|
"learning_rate": 3.409090909090909e-06,
|
|
"loss": 2.8797812461853027,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.10346638655462184,
|
|
"grad_norm": 12.78603394088546,
|
|
"learning_rate": 3.426573426573427e-06,
|
|
"loss": 2.301283121109009,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.10399159663865547,
|
|
"grad_norm": 10.171492942594694,
|
|
"learning_rate": 3.4440559440559445e-06,
|
|
"loss": 3.184098243713379,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.10451680672268908,
|
|
"grad_norm": 13.60666168480492,
|
|
"learning_rate": 3.4615384615384617e-06,
|
|
"loss": 2.9111146926879883,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.10504201680672269,
|
|
"grad_norm": 7.7514528457617695,
|
|
"learning_rate": 3.4790209790209793e-06,
|
|
"loss": 3.051267623901367,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1055672268907563,
|
|
"grad_norm": 7.5968277415578145,
|
|
"learning_rate": 3.4965034965034966e-06,
|
|
"loss": 2.8756015300750732,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.10609243697478991,
|
|
"grad_norm": 7.276738165713097,
|
|
"learning_rate": 3.5139860139860142e-06,
|
|
"loss": 2.7797441482543945,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.10661764705882353,
|
|
"grad_norm": 15.493691798944,
|
|
"learning_rate": 3.5314685314685315e-06,
|
|
"loss": 3.124054431915283,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.10714285714285714,
|
|
"grad_norm": 21.992254056135568,
|
|
"learning_rate": 3.548951048951049e-06,
|
|
"loss": 2.4626293182373047,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.10766806722689076,
|
|
"grad_norm": 22.921388511775255,
|
|
"learning_rate": 3.566433566433567e-06,
|
|
"loss": 2.624375343322754,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.10819327731092437,
|
|
"grad_norm": 13.418579314372957,
|
|
"learning_rate": 3.583916083916084e-06,
|
|
"loss": 3.0480997562408447,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.10871848739495799,
|
|
"grad_norm": 18.140784399104454,
|
|
"learning_rate": 3.6013986013986017e-06,
|
|
"loss": 3.1774277687072754,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.1092436974789916,
|
|
"grad_norm": 23.10467055540296,
|
|
"learning_rate": 3.618881118881119e-06,
|
|
"loss": 2.5532002449035645,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.10976890756302521,
|
|
"grad_norm": 18.788373579277287,
|
|
"learning_rate": 3.6363636363636366e-06,
|
|
"loss": 2.434422731399536,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.11029411764705882,
|
|
"grad_norm": 11.075037647644848,
|
|
"learning_rate": 3.653846153846154e-06,
|
|
"loss": 3.3075475692749023,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11081932773109243,
|
|
"grad_norm": 9.709952091780451,
|
|
"learning_rate": 3.6713286713286715e-06,
|
|
"loss": 3.0162198543548584,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.11134453781512606,
|
|
"grad_norm": 14.839852984942395,
|
|
"learning_rate": 3.6888111888111896e-06,
|
|
"loss": 2.989046096801758,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.11186974789915967,
|
|
"grad_norm": 9.615367564730676,
|
|
"learning_rate": 3.7062937062937064e-06,
|
|
"loss": 2.36433744430542,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.11239495798319328,
|
|
"grad_norm": 11.609479452472392,
|
|
"learning_rate": 3.7237762237762245e-06,
|
|
"loss": 2.972330093383789,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.11292016806722689,
|
|
"grad_norm": 14.618563522858688,
|
|
"learning_rate": 3.7412587412587413e-06,
|
|
"loss": 3.2336065769195557,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.1134453781512605,
|
|
"grad_norm": 11.569333653417587,
|
|
"learning_rate": 3.7587412587412593e-06,
|
|
"loss": 3.465144634246826,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.11397058823529412,
|
|
"grad_norm": 8.415895685524674,
|
|
"learning_rate": 3.776223776223776e-06,
|
|
"loss": 2.604027271270752,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.11449579831932773,
|
|
"grad_norm": 11.913951491593625,
|
|
"learning_rate": 3.7937062937062942e-06,
|
|
"loss": 3.1154234409332275,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.11502100840336134,
|
|
"grad_norm": 9.673968676254225,
|
|
"learning_rate": 3.811188811188811e-06,
|
|
"loss": 3.360666275024414,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.11554621848739496,
|
|
"grad_norm": 22.86078722612697,
|
|
"learning_rate": 3.828671328671329e-06,
|
|
"loss": 3.4900522232055664,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.11607142857142858,
|
|
"grad_norm": 13.062407442240385,
|
|
"learning_rate": 3.846153846153847e-06,
|
|
"loss": 2.4134976863861084,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.11659663865546219,
|
|
"grad_norm": 11.384408617132706,
|
|
"learning_rate": 3.863636363636364e-06,
|
|
"loss": 3.5764036178588867,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.1171218487394958,
|
|
"grad_norm": 12.438124931647444,
|
|
"learning_rate": 3.881118881118881e-06,
|
|
"loss": 3.0310349464416504,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.11764705882352941,
|
|
"grad_norm": 8.312284188439419,
|
|
"learning_rate": 3.898601398601399e-06,
|
|
"loss": 3.1959681510925293,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.11817226890756302,
|
|
"grad_norm": 11.60753443958062,
|
|
"learning_rate": 3.916083916083917e-06,
|
|
"loss": 3.0111706256866455,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.11869747899159663,
|
|
"grad_norm": 15.26549031847711,
|
|
"learning_rate": 3.933566433566433e-06,
|
|
"loss": 2.2159054279327393,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.11922268907563026,
|
|
"grad_norm": 12.052371940229559,
|
|
"learning_rate": 3.951048951048951e-06,
|
|
"loss": 2.9275083541870117,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.11974789915966387,
|
|
"grad_norm": 14.344014660662777,
|
|
"learning_rate": 3.968531468531469e-06,
|
|
"loss": 2.7885520458221436,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.12027310924369748,
|
|
"grad_norm": 10.749652811681903,
|
|
"learning_rate": 3.986013986013986e-06,
|
|
"loss": 3.0690507888793945,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.1207983193277311,
|
|
"grad_norm": 34.97507294561634,
|
|
"learning_rate": 4.003496503496504e-06,
|
|
"loss": 2.659562349319458,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.1213235294117647,
|
|
"grad_norm": 20.572400182307433,
|
|
"learning_rate": 4.020979020979021e-06,
|
|
"loss": 2.328030824661255,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.12184873949579832,
|
|
"grad_norm": 15.60933445684268,
|
|
"learning_rate": 4.0384615384615385e-06,
|
|
"loss": 2.6655068397521973,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.12237394957983193,
|
|
"grad_norm": 13.642415835616449,
|
|
"learning_rate": 4.055944055944056e-06,
|
|
"loss": 2.774475336074829,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.12289915966386554,
|
|
"grad_norm": 8.39833072721463,
|
|
"learning_rate": 4.073426573426574e-06,
|
|
"loss": 2.903592348098755,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.12342436974789917,
|
|
"grad_norm": 8.415681931951033,
|
|
"learning_rate": 4.0909090909090915e-06,
|
|
"loss": 3.264169216156006,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.12394957983193278,
|
|
"grad_norm": 19.358879649047967,
|
|
"learning_rate": 4.108391608391608e-06,
|
|
"loss": 2.8825795650482178,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.12447478991596639,
|
|
"grad_norm": 8.227022656346648,
|
|
"learning_rate": 4.125874125874127e-06,
|
|
"loss": 3.172744035720825,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.125,
|
|
"grad_norm": 11.583219450129397,
|
|
"learning_rate": 4.143356643356644e-06,
|
|
"loss": 2.7165675163269043,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.1255252100840336,
|
|
"grad_norm": 8.41588668294577,
|
|
"learning_rate": 4.160839160839161e-06,
|
|
"loss": 2.731034755706787,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.12605042016806722,
|
|
"grad_norm": 12.027666027582784,
|
|
"learning_rate": 4.178321678321678e-06,
|
|
"loss": 2.968233823776245,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.12657563025210083,
|
|
"grad_norm": 29.255592488924112,
|
|
"learning_rate": 4.195804195804197e-06,
|
|
"loss": 2.7731008529663086,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.12710084033613445,
|
|
"grad_norm": 7.723178232065691,
|
|
"learning_rate": 4.213286713286714e-06,
|
|
"loss": 3.0388343334198,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.12762605042016806,
|
|
"grad_norm": 16.25151188823703,
|
|
"learning_rate": 4.230769230769231e-06,
|
|
"loss": 3.1828207969665527,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.12815126050420167,
|
|
"grad_norm": 11.155763849543153,
|
|
"learning_rate": 4.248251748251749e-06,
|
|
"loss": 3.1027138233184814,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.12867647058823528,
|
|
"grad_norm": 18.878790527529304,
|
|
"learning_rate": 4.265734265734266e-06,
|
|
"loss": 3.6192004680633545,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.12920168067226892,
|
|
"grad_norm": 14.46355970963623,
|
|
"learning_rate": 4.283216783216784e-06,
|
|
"loss": 3.3602585792541504,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.12972689075630253,
|
|
"grad_norm": 7.185646733610239,
|
|
"learning_rate": 4.300699300699301e-06,
|
|
"loss": 3.087383270263672,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.13025210084033614,
|
|
"grad_norm": 9.554447790840326,
|
|
"learning_rate": 4.3181818181818185e-06,
|
|
"loss": 2.7826695442199707,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.13077731092436976,
|
|
"grad_norm": 13.301901608552498,
|
|
"learning_rate": 4.335664335664336e-06,
|
|
"loss": 3.091538906097412,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.13130252100840337,
|
|
"grad_norm": 12.24745264661296,
|
|
"learning_rate": 4.353146853146854e-06,
|
|
"loss": 2.380802631378174,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.13182773109243698,
|
|
"grad_norm": 11.933675211169204,
|
|
"learning_rate": 4.3706293706293715e-06,
|
|
"loss": 3.5029330253601074,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.1323529411764706,
|
|
"grad_norm": 6.36826707491301,
|
|
"learning_rate": 4.388111888111888e-06,
|
|
"loss": 2.601318120956421,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.1328781512605042,
|
|
"grad_norm": 7.8489978295167635,
|
|
"learning_rate": 4.405594405594406e-06,
|
|
"loss": 2.7776780128479004,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.1334033613445378,
|
|
"grad_norm": 16.97252676906109,
|
|
"learning_rate": 4.423076923076924e-06,
|
|
"loss": 3.3952476978302,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.13392857142857142,
|
|
"grad_norm": 11.673521836277425,
|
|
"learning_rate": 4.440559440559441e-06,
|
|
"loss": 2.972081184387207,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.13445378151260504,
|
|
"grad_norm": 9.06001648270617,
|
|
"learning_rate": 4.458041958041958e-06,
|
|
"loss": 2.2182092666625977,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.13497899159663865,
|
|
"grad_norm": 10.616408630136574,
|
|
"learning_rate": 4.475524475524476e-06,
|
|
"loss": 2.9145538806915283,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.13550420168067226,
|
|
"grad_norm": 8.129751834678931,
|
|
"learning_rate": 4.493006993006993e-06,
|
|
"loss": 2.7237493991851807,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.13602941176470587,
|
|
"grad_norm": 15.525937684631266,
|
|
"learning_rate": 4.510489510489511e-06,
|
|
"loss": 2.9480385780334473,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.13655462184873948,
|
|
"grad_norm": 9.085038297487555,
|
|
"learning_rate": 4.527972027972029e-06,
|
|
"loss": 3.32718825340271,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.13707983193277312,
|
|
"grad_norm": 12.237178714393849,
|
|
"learning_rate": 4.5454545454545455e-06,
|
|
"loss": 2.8504390716552734,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.13760504201680673,
|
|
"grad_norm": 26.459368666654875,
|
|
"learning_rate": 4.562937062937063e-06,
|
|
"loss": 3.4439854621887207,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.13813025210084034,
|
|
"grad_norm": 9.466463325357706,
|
|
"learning_rate": 4.580419580419581e-06,
|
|
"loss": 2.9045250415802,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.13865546218487396,
|
|
"grad_norm": 8.778972681585577,
|
|
"learning_rate": 4.5979020979020985e-06,
|
|
"loss": 2.701341390609741,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.13918067226890757,
|
|
"grad_norm": 10.683773330506448,
|
|
"learning_rate": 4.615384615384616e-06,
|
|
"loss": 2.8882217407226562,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.13970588235294118,
|
|
"grad_norm": 11.01283314975054,
|
|
"learning_rate": 4.632867132867133e-06,
|
|
"loss": 3.0006091594696045,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.1402310924369748,
|
|
"grad_norm": 10.175808650808271,
|
|
"learning_rate": 4.650349650349651e-06,
|
|
"loss": 2.9005932807922363,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.1407563025210084,
|
|
"grad_norm": 20.32774974068184,
|
|
"learning_rate": 4.667832167832168e-06,
|
|
"loss": 2.868788719177246,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.14128151260504201,
|
|
"grad_norm": 16.498875846940365,
|
|
"learning_rate": 4.685314685314686e-06,
|
|
"loss": 2.9905383586883545,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.14180672268907563,
|
|
"grad_norm": 11.147824941371098,
|
|
"learning_rate": 4.702797202797203e-06,
|
|
"loss": 2.5644679069519043,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14233193277310924,
|
|
"grad_norm": 15.519193323150592,
|
|
"learning_rate": 4.72027972027972e-06,
|
|
"loss": 2.969034194946289,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 15.987808850113785,
|
|
"learning_rate": 4.737762237762238e-06,
|
|
"loss": 3.084165573120117,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.14338235294117646,
|
|
"grad_norm": 16.02073441890616,
|
|
"learning_rate": 4.755244755244756e-06,
|
|
"loss": 2.9546210765838623,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.14390756302521007,
|
|
"grad_norm": 12.82049236600706,
|
|
"learning_rate": 4.772727272727273e-06,
|
|
"loss": 2.4903135299682617,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.14443277310924368,
|
|
"grad_norm": 8.825327979578235,
|
|
"learning_rate": 4.79020979020979e-06,
|
|
"loss": 2.3993375301361084,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.14495798319327732,
|
|
"grad_norm": 11.433202975059613,
|
|
"learning_rate": 4.807692307692308e-06,
|
|
"loss": 3.0640833377838135,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.14548319327731093,
|
|
"grad_norm": 16.78525878625079,
|
|
"learning_rate": 4.8251748251748255e-06,
|
|
"loss": 2.915358543395996,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.14600840336134455,
|
|
"grad_norm": 12.297443057485832,
|
|
"learning_rate": 4.842657342657343e-06,
|
|
"loss": 2.8075063228607178,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.14653361344537816,
|
|
"grad_norm": 7.701076026693094,
|
|
"learning_rate": 4.86013986013986e-06,
|
|
"loss": 2.650007963180542,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.14705882352941177,
|
|
"grad_norm": 21.50963940806387,
|
|
"learning_rate": 4.877622377622378e-06,
|
|
"loss": 2.8522448539733887,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.14758403361344538,
|
|
"grad_norm": 17.474278566991046,
|
|
"learning_rate": 4.895104895104895e-06,
|
|
"loss": 2.9502627849578857,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.148109243697479,
|
|
"grad_norm": 9.91976809612693,
|
|
"learning_rate": 4.912587412587413e-06,
|
|
"loss": 3.335960626602173,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.1486344537815126,
|
|
"grad_norm": 9.918649814805438,
|
|
"learning_rate": 4.930069930069931e-06,
|
|
"loss": 2.692478895187378,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.14915966386554622,
|
|
"grad_norm": 11.143348082372727,
|
|
"learning_rate": 4.9475524475524474e-06,
|
|
"loss": 2.9984793663024902,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.14968487394957983,
|
|
"grad_norm": 10.73671912279009,
|
|
"learning_rate": 4.965034965034965e-06,
|
|
"loss": 3.2969272136688232,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.15021008403361344,
|
|
"grad_norm": 13.521630444285467,
|
|
"learning_rate": 4.982517482517483e-06,
|
|
"loss": 2.885775566101074,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.15073529411764705,
|
|
"grad_norm": 7.998114081414412,
|
|
"learning_rate": 5e-06,
|
|
"loss": 2.1184072494506836,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.15126050420168066,
|
|
"grad_norm": 13.94717668174975,
|
|
"learning_rate": 5.017482517482518e-06,
|
|
"loss": 3.6645607948303223,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.15178571428571427,
|
|
"grad_norm": 10.631771354405638,
|
|
"learning_rate": 5.034965034965036e-06,
|
|
"loss": 2.526690721511841,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.15231092436974789,
|
|
"grad_norm": 14.444464408724446,
|
|
"learning_rate": 5.0524475524475525e-06,
|
|
"loss": 3.0754122734069824,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.15283613445378152,
|
|
"grad_norm": 22.846153399067962,
|
|
"learning_rate": 5.06993006993007e-06,
|
|
"loss": 2.9716532230377197,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.15336134453781514,
|
|
"grad_norm": 10.441806847508389,
|
|
"learning_rate": 5.087412587412588e-06,
|
|
"loss": 3.054677724838257,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.15388655462184875,
|
|
"grad_norm": 11.57460744247853,
|
|
"learning_rate": 5.1048951048951055e-06,
|
|
"loss": 2.5806143283843994,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.15441176470588236,
|
|
"grad_norm": 8.620784213684933,
|
|
"learning_rate": 5.122377622377622e-06,
|
|
"loss": 3.0842535495758057,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.15493697478991597,
|
|
"grad_norm": 9.731529031655892,
|
|
"learning_rate": 5.13986013986014e-06,
|
|
"loss": 2.42826509475708,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.15546218487394958,
|
|
"grad_norm": 12.396404564185275,
|
|
"learning_rate": 5.157342657342658e-06,
|
|
"loss": 3.0564281940460205,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.1559873949579832,
|
|
"grad_norm": 14.222847351584731,
|
|
"learning_rate": 5.174825174825175e-06,
|
|
"loss": 2.490323543548584,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.1565126050420168,
|
|
"grad_norm": 9.084233904714603,
|
|
"learning_rate": 5.192307692307693e-06,
|
|
"loss": 2.894385814666748,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.15703781512605042,
|
|
"grad_norm": 10.192577795739174,
|
|
"learning_rate": 5.20979020979021e-06,
|
|
"loss": 2.8993654251098633,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.15756302521008403,
|
|
"grad_norm": 9.57220974681429,
|
|
"learning_rate": 5.2272727272727274e-06,
|
|
"loss": 2.6970958709716797,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.15808823529411764,
|
|
"grad_norm": 22.600556530562834,
|
|
"learning_rate": 5.244755244755245e-06,
|
|
"loss": 3.1567840576171875,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.15861344537815125,
|
|
"grad_norm": 23.030039586268348,
|
|
"learning_rate": 5.262237762237763e-06,
|
|
"loss": 3.4071342945098877,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.15913865546218486,
|
|
"grad_norm": 9.933167341415635,
|
|
"learning_rate": 5.27972027972028e-06,
|
|
"loss": 3.2700154781341553,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.15966386554621848,
|
|
"grad_norm": 8.272506082831457,
|
|
"learning_rate": 5.297202797202797e-06,
|
|
"loss": 2.4964592456817627,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.16018907563025211,
|
|
"grad_norm": 10.42727483479198,
|
|
"learning_rate": 5.314685314685315e-06,
|
|
"loss": 2.8914425373077393,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.16071428571428573,
|
|
"grad_norm": 10.080120194085032,
|
|
"learning_rate": 5.3321678321678325e-06,
|
|
"loss": 2.873271942138672,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.16123949579831934,
|
|
"grad_norm": 25.48866106385711,
|
|
"learning_rate": 5.34965034965035e-06,
|
|
"loss": 3.065826892852783,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.16176470588235295,
|
|
"grad_norm": 7.985604781040925,
|
|
"learning_rate": 5.367132867132867e-06,
|
|
"loss": 2.7347259521484375,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.16228991596638656,
|
|
"grad_norm": 22.85816677576059,
|
|
"learning_rate": 5.384615384615385e-06,
|
|
"loss": 3.3949899673461914,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.16281512605042017,
|
|
"grad_norm": 12.951795036113193,
|
|
"learning_rate": 5.402097902097902e-06,
|
|
"loss": 2.639277219772339,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.16334033613445378,
|
|
"grad_norm": 18.327832664050792,
|
|
"learning_rate": 5.41958041958042e-06,
|
|
"loss": 2.5833029747009277,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.1638655462184874,
|
|
"grad_norm": 12.24670204746658,
|
|
"learning_rate": 5.437062937062938e-06,
|
|
"loss": 2.796782970428467,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.164390756302521,
|
|
"grad_norm": 15.437957584113905,
|
|
"learning_rate": 5.4545454545454545e-06,
|
|
"loss": 3.127584934234619,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.16491596638655462,
|
|
"grad_norm": 16.523944603955105,
|
|
"learning_rate": 5.472027972027972e-06,
|
|
"loss": 2.613748550415039,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.16544117647058823,
|
|
"grad_norm": 17.531432028273453,
|
|
"learning_rate": 5.48951048951049e-06,
|
|
"loss": 2.914043664932251,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.16596638655462184,
|
|
"grad_norm": 8.873856320684599,
|
|
"learning_rate": 5.5069930069930074e-06,
|
|
"loss": 2.637112855911255,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.16649159663865545,
|
|
"grad_norm": 9.716690797904613,
|
|
"learning_rate": 5.524475524475524e-06,
|
|
"loss": 2.3453238010406494,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.16701680672268907,
|
|
"grad_norm": 13.335520358387795,
|
|
"learning_rate": 5.541958041958042e-06,
|
|
"loss": 2.8517067432403564,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.16754201680672268,
|
|
"grad_norm": 7.958293446281881,
|
|
"learning_rate": 5.5594405594405596e-06,
|
|
"loss": 2.308143377304077,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.16806722689075632,
|
|
"grad_norm": 13.536505294069386,
|
|
"learning_rate": 5.576923076923077e-06,
|
|
"loss": 2.496377944946289,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.16859243697478993,
|
|
"grad_norm": 11.556116676169184,
|
|
"learning_rate": 5.594405594405595e-06,
|
|
"loss": 2.844165802001953,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.16911764705882354,
|
|
"grad_norm": 38.4339348963778,
|
|
"learning_rate": 5.611888111888112e-06,
|
|
"loss": 3.34017276763916,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.16964285714285715,
|
|
"grad_norm": 8.81393612808593,
|
|
"learning_rate": 5.629370629370629e-06,
|
|
"loss": 2.8381388187408447,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.17016806722689076,
|
|
"grad_norm": 13.491449386435834,
|
|
"learning_rate": 5.646853146853147e-06,
|
|
"loss": 3.00146484375,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.17069327731092437,
|
|
"grad_norm": 10.56532718056163,
|
|
"learning_rate": 5.664335664335665e-06,
|
|
"loss": 3.2175936698913574,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.17121848739495799,
|
|
"grad_norm": 8.90381181305133,
|
|
"learning_rate": 5.681818181818183e-06,
|
|
"loss": 2.1831305027008057,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.1717436974789916,
|
|
"grad_norm": 13.833832379132224,
|
|
"learning_rate": 5.699300699300699e-06,
|
|
"loss": 2.598001003265381,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.1722689075630252,
|
|
"grad_norm": 13.284880697736416,
|
|
"learning_rate": 5.716783216783217e-06,
|
|
"loss": 2.8579132556915283,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.17279411764705882,
|
|
"grad_norm": 10.03274643755016,
|
|
"learning_rate": 5.7342657342657345e-06,
|
|
"loss": 2.6181423664093018,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.17331932773109243,
|
|
"grad_norm": 22.94440201330624,
|
|
"learning_rate": 5.751748251748253e-06,
|
|
"loss": 3.288753032684326,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.17384453781512604,
|
|
"grad_norm": 6.984083541878451,
|
|
"learning_rate": 5.769230769230769e-06,
|
|
"loss": 3.0615525245666504,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.17436974789915966,
|
|
"grad_norm": 10.044648077843894,
|
|
"learning_rate": 5.786713286713287e-06,
|
|
"loss": 2.8029568195343018,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.17489495798319327,
|
|
"grad_norm": 8.03248869711385,
|
|
"learning_rate": 5.804195804195804e-06,
|
|
"loss": 3.3441247940063477,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.17542016806722688,
|
|
"grad_norm": 17.576647360014295,
|
|
"learning_rate": 5.821678321678323e-06,
|
|
"loss": 2.7332277297973633,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.17594537815126052,
|
|
"grad_norm": 10.870948748653749,
|
|
"learning_rate": 5.83916083916084e-06,
|
|
"loss": 3.1329116821289062,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.17647058823529413,
|
|
"grad_norm": 9.547122044890056,
|
|
"learning_rate": 5.856643356643356e-06,
|
|
"loss": 2.8317198753356934,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.17699579831932774,
|
|
"grad_norm": 16.535666974764705,
|
|
"learning_rate": 5.874125874125874e-06,
|
|
"loss": 3.296326160430908,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.17752100840336135,
|
|
"grad_norm": 32.236437315703704,
|
|
"learning_rate": 5.8916083916083925e-06,
|
|
"loss": 2.118476629257202,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.17804621848739496,
|
|
"grad_norm": 11.630796483773931,
|
|
"learning_rate": 5.90909090909091e-06,
|
|
"loss": 3.121518611907959,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.17857142857142858,
|
|
"grad_norm": 19.388312080195448,
|
|
"learning_rate": 5.926573426573428e-06,
|
|
"loss": 3.1046595573425293,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.1790966386554622,
|
|
"grad_norm": 12.001962559970615,
|
|
"learning_rate": 5.944055944055944e-06,
|
|
"loss": 2.9630184173583984,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.1796218487394958,
|
|
"grad_norm": 16.359504215504014,
|
|
"learning_rate": 5.961538461538462e-06,
|
|
"loss": 3.542586326599121,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.1801470588235294,
|
|
"grad_norm": 15.97692453633404,
|
|
"learning_rate": 5.97902097902098e-06,
|
|
"loss": 2.631359577178955,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.18067226890756302,
|
|
"grad_norm": 10.896101566535322,
|
|
"learning_rate": 5.996503496503498e-06,
|
|
"loss": 2.8164381980895996,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.18119747899159663,
|
|
"grad_norm": 8.405088981188355,
|
|
"learning_rate": 6.013986013986014e-06,
|
|
"loss": 2.8848729133605957,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.18172268907563024,
|
|
"grad_norm": 12.527972452854746,
|
|
"learning_rate": 6.031468531468532e-06,
|
|
"loss": 3.0304975509643555,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.18224789915966386,
|
|
"grad_norm": 11.26596331715966,
|
|
"learning_rate": 6.04895104895105e-06,
|
|
"loss": 2.392552137374878,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.18277310924369747,
|
|
"grad_norm": 16.52357762076619,
|
|
"learning_rate": 6.0664335664335674e-06,
|
|
"loss": 2.7970218658447266,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.18329831932773108,
|
|
"grad_norm": 17.84614213512548,
|
|
"learning_rate": 6.083916083916085e-06,
|
|
"loss": 3.2228212356567383,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.18382352941176472,
|
|
"grad_norm": 11.699379344566392,
|
|
"learning_rate": 6.101398601398602e-06,
|
|
"loss": 3.0732061862945557,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.18434873949579833,
|
|
"grad_norm": 8.986265862248208,
|
|
"learning_rate": 6.1188811188811196e-06,
|
|
"loss": 2.5782558917999268,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.18487394957983194,
|
|
"grad_norm": 10.091949551695143,
|
|
"learning_rate": 6.136363636363637e-06,
|
|
"loss": 2.93255877494812,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.18539915966386555,
|
|
"grad_norm": 9.658375459178844,
|
|
"learning_rate": 6.153846153846155e-06,
|
|
"loss": 3.047292470932007,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.18592436974789917,
|
|
"grad_norm": 13.676797411232373,
|
|
"learning_rate": 6.171328671328672e-06,
|
|
"loss": 2.2338690757751465,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.18644957983193278,
|
|
"grad_norm": 15.01158558376865,
|
|
"learning_rate": 6.188811188811189e-06,
|
|
"loss": 1.2794667482376099,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.1869747899159664,
|
|
"grad_norm": 9.743805359222968,
|
|
"learning_rate": 6.206293706293707e-06,
|
|
"loss": 2.782721519470215,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.1875,
|
|
"grad_norm": 19.860933042719402,
|
|
"learning_rate": 6.223776223776225e-06,
|
|
"loss": 3.5968642234802246,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.1880252100840336,
|
|
"grad_norm": 11.718618315486523,
|
|
"learning_rate": 6.241258741258742e-06,
|
|
"loss": 3.20735502243042,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.18855042016806722,
|
|
"grad_norm": 17.40896852185293,
|
|
"learning_rate": 6.258741258741259e-06,
|
|
"loss": 2.6875576972961426,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.18907563025210083,
|
|
"grad_norm": 30.279753647692278,
|
|
"learning_rate": 6.276223776223777e-06,
|
|
"loss": 2.811734676361084,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.18960084033613445,
|
|
"grad_norm": 11.568365547294238,
|
|
"learning_rate": 6.2937062937062944e-06,
|
|
"loss": 2.677192449569702,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.19012605042016806,
|
|
"grad_norm": 7.457266358229516,
|
|
"learning_rate": 6.311188811188812e-06,
|
|
"loss": 3.1059458255767822,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.19065126050420167,
|
|
"grad_norm": 25.871852992276406,
|
|
"learning_rate": 6.32867132867133e-06,
|
|
"loss": 1.924442172050476,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.19117647058823528,
|
|
"grad_norm": 13.099023718893376,
|
|
"learning_rate": 6.3461538461538466e-06,
|
|
"loss": 2.608693838119507,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.19170168067226892,
|
|
"grad_norm": 19.954457552123216,
|
|
"learning_rate": 6.363636363636364e-06,
|
|
"loss": 3.336481809616089,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.19222689075630253,
|
|
"grad_norm": 10.593017470914477,
|
|
"learning_rate": 6.381118881118882e-06,
|
|
"loss": 2.4383559226989746,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.19275210084033614,
|
|
"grad_norm": 53.152245448473245,
|
|
"learning_rate": 6.3986013986013996e-06,
|
|
"loss": 3.030740261077881,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.19327731092436976,
|
|
"grad_norm": 20.630864189878373,
|
|
"learning_rate": 6.416083916083916e-06,
|
|
"loss": 3.375492572784424,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.19380252100840337,
|
|
"grad_norm": 20.40697417985276,
|
|
"learning_rate": 6.433566433566434e-06,
|
|
"loss": 3.3279261589050293,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.19432773109243698,
|
|
"grad_norm": 14.21414483251476,
|
|
"learning_rate": 6.451048951048952e-06,
|
|
"loss": 4.912796974182129,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.1948529411764706,
|
|
"grad_norm": 10.735114493440422,
|
|
"learning_rate": 6.468531468531469e-06,
|
|
"loss": 2.725327730178833,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.1953781512605042,
|
|
"grad_norm": 12.21193520568221,
|
|
"learning_rate": 6.486013986013987e-06,
|
|
"loss": 2.3871378898620605,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.1959033613445378,
|
|
"grad_norm": 12.19748977293293,
|
|
"learning_rate": 6.503496503496504e-06,
|
|
"loss": 3.147094249725342,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.19642857142857142,
|
|
"grad_norm": 12.112433407918623,
|
|
"learning_rate": 6.5209790209790215e-06,
|
|
"loss": 3.0235302448272705,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.19695378151260504,
|
|
"grad_norm": 8.68406243331884,
|
|
"learning_rate": 6.538461538461539e-06,
|
|
"loss": 2.0696754455566406,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.19747899159663865,
|
|
"grad_norm": 8.88435144089432,
|
|
"learning_rate": 6.555944055944057e-06,
|
|
"loss": 3.0018765926361084,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.19800420168067226,
|
|
"grad_norm": 10.335624452113418,
|
|
"learning_rate": 6.573426573426574e-06,
|
|
"loss": 2.522580146789551,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.19852941176470587,
|
|
"grad_norm": 9.176871587842054,
|
|
"learning_rate": 6.590909090909091e-06,
|
|
"loss": 2.918633460998535,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.19905462184873948,
|
|
"grad_norm": 13.695375443957785,
|
|
"learning_rate": 6.608391608391609e-06,
|
|
"loss": 2.945152759552002,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.19957983193277312,
|
|
"grad_norm": 26.971965437533072,
|
|
"learning_rate": 6.6258741258741266e-06,
|
|
"loss": 2.4536383152008057,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.20010504201680673,
|
|
"grad_norm": 10.702095768119317,
|
|
"learning_rate": 6.643356643356644e-06,
|
|
"loss": 2.701633930206299,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.20063025210084034,
|
|
"grad_norm": 11.714171516300004,
|
|
"learning_rate": 6.660839160839161e-06,
|
|
"loss": 3.37770414352417,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.20115546218487396,
|
|
"grad_norm": 12.122437886642187,
|
|
"learning_rate": 6.678321678321679e-06,
|
|
"loss": 2.713408946990967,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.20168067226890757,
|
|
"grad_norm": 6.4339691304640425,
|
|
"learning_rate": 6.695804195804196e-06,
|
|
"loss": 2.7212979793548584,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.20220588235294118,
|
|
"grad_norm": 14.561942681725727,
|
|
"learning_rate": 6.713286713286714e-06,
|
|
"loss": 2.2358717918395996,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.2027310924369748,
|
|
"grad_norm": 9.649089049655132,
|
|
"learning_rate": 6.730769230769232e-06,
|
|
"loss": 2.7439918518066406,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.2032563025210084,
|
|
"grad_norm": 8.951758220922223,
|
|
"learning_rate": 6.7482517482517485e-06,
|
|
"loss": 2.8917198181152344,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.20378151260504201,
|
|
"grad_norm": 12.727990149897483,
|
|
"learning_rate": 6.765734265734266e-06,
|
|
"loss": 2.8389973640441895,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.20430672268907563,
|
|
"grad_norm": 12.096894265468466,
|
|
"learning_rate": 6.783216783216784e-06,
|
|
"loss": 2.311751365661621,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.20483193277310924,
|
|
"grad_norm": 8.755646218028499,
|
|
"learning_rate": 6.8006993006993015e-06,
|
|
"loss": 2.6330480575561523,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.20535714285714285,
|
|
"grad_norm": 12.55791109622108,
|
|
"learning_rate": 6.818181818181818e-06,
|
|
"loss": 2.372936248779297,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.20588235294117646,
|
|
"grad_norm": 7.279471068593738,
|
|
"learning_rate": 6.835664335664336e-06,
|
|
"loss": 3.1617183685302734,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.20640756302521007,
|
|
"grad_norm": 11.020675003369632,
|
|
"learning_rate": 6.853146853146854e-06,
|
|
"loss": 3.1837525367736816,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.20693277310924368,
|
|
"grad_norm": 10.629050400933053,
|
|
"learning_rate": 6.870629370629371e-06,
|
|
"loss": 3.0233469009399414,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.20745798319327732,
|
|
"grad_norm": 24.269864804926,
|
|
"learning_rate": 6.888111888111889e-06,
|
|
"loss": 2.827796220779419,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.20798319327731093,
|
|
"grad_norm": 8.847343500765968,
|
|
"learning_rate": 6.905594405594406e-06,
|
|
"loss": 2.9433014392852783,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.20850840336134455,
|
|
"grad_norm": 12.290546828593667,
|
|
"learning_rate": 6.923076923076923e-06,
|
|
"loss": 3.1878905296325684,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.20903361344537816,
|
|
"grad_norm": 7.87874296353446,
|
|
"learning_rate": 6.940559440559441e-06,
|
|
"loss": 1.6629223823547363,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.20955882352941177,
|
|
"grad_norm": 18.84740177195252,
|
|
"learning_rate": 6.958041958041959e-06,
|
|
"loss": 2.9216864109039307,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.21008403361344538,
|
|
"grad_norm": 10.619109030802841,
|
|
"learning_rate": 6.975524475524476e-06,
|
|
"loss": 2.8075966835021973,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.210609243697479,
|
|
"grad_norm": 8.826150550203034,
|
|
"learning_rate": 6.993006993006993e-06,
|
|
"loss": 2.8049070835113525,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.2111344537815126,
|
|
"grad_norm": 13.199071344423656,
|
|
"learning_rate": 7.010489510489511e-06,
|
|
"loss": 3.451148748397827,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.21165966386554622,
|
|
"grad_norm": 43.12178745862726,
|
|
"learning_rate": 7.0279720279720285e-06,
|
|
"loss": 3.186109781265259,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.21218487394957983,
|
|
"grad_norm": 9.661154608505777,
|
|
"learning_rate": 7.045454545454546e-06,
|
|
"loss": 2.57002592086792,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.21271008403361344,
|
|
"grad_norm": 15.372456538441849,
|
|
"learning_rate": 7.062937062937063e-06,
|
|
"loss": 3.042940616607666,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.21323529411764705,
|
|
"grad_norm": 12.699673608642163,
|
|
"learning_rate": 7.080419580419581e-06,
|
|
"loss": 2.5307865142822266,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.21376050420168066,
|
|
"grad_norm": 58.7512320706578,
|
|
"learning_rate": 7.097902097902098e-06,
|
|
"loss": 2.579166889190674,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.21428571428571427,
|
|
"grad_norm": 7.817782683190833,
|
|
"learning_rate": 7.115384615384616e-06,
|
|
"loss": 2.7906084060668945,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.21481092436974789,
|
|
"grad_norm": 14.115688106613835,
|
|
"learning_rate": 7.132867132867134e-06,
|
|
"loss": 2.506618022918701,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.21533613445378152,
|
|
"grad_norm": 7.327806401537482,
|
|
"learning_rate": 7.15034965034965e-06,
|
|
"loss": 2.4115633964538574,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.21586134453781514,
|
|
"grad_norm": 19.26219369457865,
|
|
"learning_rate": 7.167832167832168e-06,
|
|
"loss": 3.5165960788726807,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.21638655462184875,
|
|
"grad_norm": 28.48386496666936,
|
|
"learning_rate": 7.185314685314686e-06,
|
|
"loss": 2.7558658123016357,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.21691176470588236,
|
|
"grad_norm": 11.447207055182961,
|
|
"learning_rate": 7.202797202797203e-06,
|
|
"loss": 2.8666818141937256,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.21743697478991597,
|
|
"grad_norm": 21.88243211842485,
|
|
"learning_rate": 7.22027972027972e-06,
|
|
"loss": 3.344409227371216,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.21796218487394958,
|
|
"grad_norm": 13.79881510025008,
|
|
"learning_rate": 7.237762237762238e-06,
|
|
"loss": 2.7095088958740234,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.2184873949579832,
|
|
"grad_norm": 13.143219789122687,
|
|
"learning_rate": 7.2552447552447555e-06,
|
|
"loss": 2.305788516998291,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.2190126050420168,
|
|
"grad_norm": 27.665957272489745,
|
|
"learning_rate": 7.272727272727273e-06,
|
|
"loss": 2.0047407150268555,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.21953781512605042,
|
|
"grad_norm": 9.680989528857488,
|
|
"learning_rate": 7.290209790209791e-06,
|
|
"loss": 2.2492542266845703,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.22006302521008403,
|
|
"grad_norm": 50.407251349513935,
|
|
"learning_rate": 7.307692307692308e-06,
|
|
"loss": 2.4409117698669434,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.22058823529411764,
|
|
"grad_norm": 12.795457760530025,
|
|
"learning_rate": 7.325174825174825e-06,
|
|
"loss": 2.942840576171875,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22111344537815125,
|
|
"grad_norm": 14.993251079745352,
|
|
"learning_rate": 7.342657342657343e-06,
|
|
"loss": 2.966130256652832,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.22163865546218486,
|
|
"grad_norm": 12.05849461018198,
|
|
"learning_rate": 7.360139860139861e-06,
|
|
"loss": 3.0949718952178955,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.22216386554621848,
|
|
"grad_norm": 24.531642489999527,
|
|
"learning_rate": 7.377622377622379e-06,
|
|
"loss": 3.202450752258301,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.22268907563025211,
|
|
"grad_norm": 12.426345197431875,
|
|
"learning_rate": 7.395104895104895e-06,
|
|
"loss": 2.9883623123168945,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.22321428571428573,
|
|
"grad_norm": 8.65476120633608,
|
|
"learning_rate": 7.412587412587413e-06,
|
|
"loss": 2.8595736026763916,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.22373949579831934,
|
|
"grad_norm": 22.944717699417094,
|
|
"learning_rate": 7.43006993006993e-06,
|
|
"loss": 4.234559535980225,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.22426470588235295,
|
|
"grad_norm": 17.425585791825945,
|
|
"learning_rate": 7.447552447552449e-06,
|
|
"loss": 2.753218650817871,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.22478991596638656,
|
|
"grad_norm": 11.927897223766982,
|
|
"learning_rate": 7.465034965034965e-06,
|
|
"loss": 2.100989818572998,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.22531512605042017,
|
|
"grad_norm": 14.864287091266377,
|
|
"learning_rate": 7.4825174825174825e-06,
|
|
"loss": 3.3836236000061035,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.22584033613445378,
|
|
"grad_norm": 9.490344862736825,
|
|
"learning_rate": 7.500000000000001e-06,
|
|
"loss": 2.7179176807403564,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.2263655462184874,
|
|
"grad_norm": 9.573928430416439,
|
|
"learning_rate": 7.517482517482519e-06,
|
|
"loss": 2.606822967529297,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.226890756302521,
|
|
"grad_norm": 8.857455297135216,
|
|
"learning_rate": 7.534965034965036e-06,
|
|
"loss": 2.43107533454895,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.22741596638655462,
|
|
"grad_norm": 10.678051193590884,
|
|
"learning_rate": 7.552447552447552e-06,
|
|
"loss": 2.6220951080322266,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.22794117647058823,
|
|
"grad_norm": 13.919948243399672,
|
|
"learning_rate": 7.569930069930071e-06,
|
|
"loss": 2.6895902156829834,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.22846638655462184,
|
|
"grad_norm": 12.759389092274477,
|
|
"learning_rate": 7.5874125874125885e-06,
|
|
"loss": 2.776400566101074,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.22899159663865545,
|
|
"grad_norm": 14.99027393652492,
|
|
"learning_rate": 7.604895104895106e-06,
|
|
"loss": 3.0019478797912598,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.22951680672268907,
|
|
"grad_norm": 19.973323516321663,
|
|
"learning_rate": 7.622377622377622e-06,
|
|
"loss": 2.6804957389831543,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.23004201680672268,
|
|
"grad_norm": 15.91658106452273,
|
|
"learning_rate": 7.63986013986014e-06,
|
|
"loss": 3.0910873413085938,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.23056722689075632,
|
|
"grad_norm": 11.941879681685068,
|
|
"learning_rate": 7.657342657342658e-06,
|
|
"loss": 2.8254013061523438,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.23109243697478993,
|
|
"grad_norm": 16.67329539072613,
|
|
"learning_rate": 7.674825174825176e-06,
|
|
"loss": 2.724972724914551,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.23161764705882354,
|
|
"grad_norm": 12.006380920035427,
|
|
"learning_rate": 7.692307692307694e-06,
|
|
"loss": 1.9090602397918701,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.23214285714285715,
|
|
"grad_norm": 17.75374334716951,
|
|
"learning_rate": 7.70979020979021e-06,
|
|
"loss": 3.409651279449463,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.23266806722689076,
|
|
"grad_norm": 17.823273573885945,
|
|
"learning_rate": 7.727272727272727e-06,
|
|
"loss": 3.083376884460449,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.23319327731092437,
|
|
"grad_norm": 11.395891600147067,
|
|
"learning_rate": 7.744755244755245e-06,
|
|
"loss": 3.033438205718994,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.23371848739495799,
|
|
"grad_norm": 17.736568351871117,
|
|
"learning_rate": 7.762237762237763e-06,
|
|
"loss": 2.304586887359619,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.2342436974789916,
|
|
"grad_norm": 15.868522472650948,
|
|
"learning_rate": 7.77972027972028e-06,
|
|
"loss": 3.2195613384246826,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.2347689075630252,
|
|
"grad_norm": 7.286503781689302,
|
|
"learning_rate": 7.797202797202798e-06,
|
|
"loss": 2.5759634971618652,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.23529411764705882,
|
|
"grad_norm": 12.91141291704742,
|
|
"learning_rate": 7.814685314685316e-06,
|
|
"loss": 2.8108344078063965,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.23581932773109243,
|
|
"grad_norm": 14.991946303213734,
|
|
"learning_rate": 7.832167832167833e-06,
|
|
"loss": 3.3856277465820312,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.23634453781512604,
|
|
"grad_norm": 13.392793377874854,
|
|
"learning_rate": 7.84965034965035e-06,
|
|
"loss": 3.1772289276123047,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.23686974789915966,
|
|
"grad_norm": 28.1178428850119,
|
|
"learning_rate": 7.867132867132867e-06,
|
|
"loss": 2.233346939086914,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.23739495798319327,
|
|
"grad_norm": 12.912780095530756,
|
|
"learning_rate": 7.884615384615384e-06,
|
|
"loss": 3.240116596221924,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.23792016806722688,
|
|
"grad_norm": 16.063801823505663,
|
|
"learning_rate": 7.902097902097902e-06,
|
|
"loss": 3.0511634349823,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.23844537815126052,
|
|
"grad_norm": 20.678965505863175,
|
|
"learning_rate": 7.91958041958042e-06,
|
|
"loss": 2.8257339000701904,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.23897058823529413,
|
|
"grad_norm": 8.976117116341166,
|
|
"learning_rate": 7.937062937062937e-06,
|
|
"loss": 3.0524065494537354,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.23949579831932774,
|
|
"grad_norm": 10.251712933120457,
|
|
"learning_rate": 7.954545454545455e-06,
|
|
"loss": 3.0190975666046143,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.24002100840336135,
|
|
"grad_norm": 21.335298624149395,
|
|
"learning_rate": 7.972027972027973e-06,
|
|
"loss": 2.1252050399780273,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.24054621848739496,
|
|
"grad_norm": 7.27545971092921,
|
|
"learning_rate": 7.98951048951049e-06,
|
|
"loss": 2.513131618499756,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.24107142857142858,
|
|
"grad_norm": 12.600056652767337,
|
|
"learning_rate": 8.006993006993008e-06,
|
|
"loss": 2.483368396759033,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.2415966386554622,
|
|
"grad_norm": 6.832576475942737,
|
|
"learning_rate": 8.024475524475524e-06,
|
|
"loss": 3.0280661582946777,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2421218487394958,
|
|
"grad_norm": 26.980936003585214,
|
|
"learning_rate": 8.041958041958042e-06,
|
|
"loss": 3.4621315002441406,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.2426470588235294,
|
|
"grad_norm": 7.878259025356279,
|
|
"learning_rate": 8.05944055944056e-06,
|
|
"loss": 2.8057236671447754,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.24317226890756302,
|
|
"grad_norm": 10.356915736855935,
|
|
"learning_rate": 8.076923076923077e-06,
|
|
"loss": 2.4050445556640625,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.24369747899159663,
|
|
"grad_norm": 7.750041232775024,
|
|
"learning_rate": 8.094405594405595e-06,
|
|
"loss": 2.925077199935913,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.24422268907563024,
|
|
"grad_norm": 11.688477607154828,
|
|
"learning_rate": 8.111888111888112e-06,
|
|
"loss": 3.2121710777282715,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.24474789915966386,
|
|
"grad_norm": 14.487448473659374,
|
|
"learning_rate": 8.12937062937063e-06,
|
|
"loss": 2.1468801498413086,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.24527310924369747,
|
|
"grad_norm": 23.42760042788643,
|
|
"learning_rate": 8.146853146853148e-06,
|
|
"loss": 3.138906478881836,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.24579831932773108,
|
|
"grad_norm": 26.956291607054943,
|
|
"learning_rate": 8.164335664335665e-06,
|
|
"loss": 2.754650592803955,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.24632352941176472,
|
|
"grad_norm": 12.804852697963751,
|
|
"learning_rate": 8.181818181818183e-06,
|
|
"loss": 2.7791335582733154,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.24684873949579833,
|
|
"grad_norm": 6.708018742172395,
|
|
"learning_rate": 8.199300699300699e-06,
|
|
"loss": 2.6262125968933105,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.24737394957983194,
|
|
"grad_norm": 9.374690323175574,
|
|
"learning_rate": 8.216783216783217e-06,
|
|
"loss": 2.1458685398101807,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.24789915966386555,
|
|
"grad_norm": 10.154124803652127,
|
|
"learning_rate": 8.234265734265734e-06,
|
|
"loss": 2.8066558837890625,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.24842436974789917,
|
|
"grad_norm": 8.731552323977164,
|
|
"learning_rate": 8.251748251748254e-06,
|
|
"loss": 2.9753003120422363,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.24894957983193278,
|
|
"grad_norm": 11.294678238925123,
|
|
"learning_rate": 8.26923076923077e-06,
|
|
"loss": 2.6883530616760254,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.2494747899159664,
|
|
"grad_norm": 11.075640338771539,
|
|
"learning_rate": 8.286713286713287e-06,
|
|
"loss": 3.029934883117676,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 9.065132853953475,
|
|
"learning_rate": 8.304195804195805e-06,
|
|
"loss": 3.183340072631836,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.2505252100840336,
|
|
"grad_norm": 11.811314399111644,
|
|
"learning_rate": 8.321678321678323e-06,
|
|
"loss": 2.8612143993377686,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.2510504201680672,
|
|
"grad_norm": 13.831559056031068,
|
|
"learning_rate": 8.33916083916084e-06,
|
|
"loss": 2.720844268798828,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.25157563025210083,
|
|
"grad_norm": 18.044618112881952,
|
|
"learning_rate": 8.356643356643356e-06,
|
|
"loss": 3.130622625350952,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.25210084033613445,
|
|
"grad_norm": 10.808743157600965,
|
|
"learning_rate": 8.374125874125874e-06,
|
|
"loss": 2.920591354370117,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.25262605042016806,
|
|
"grad_norm": 14.286153466059856,
|
|
"learning_rate": 8.391608391608393e-06,
|
|
"loss": 2.8152403831481934,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.25315126050420167,
|
|
"grad_norm": 17.403115383352798,
|
|
"learning_rate": 8.40909090909091e-06,
|
|
"loss": 2.840240240097046,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.2536764705882353,
|
|
"grad_norm": 29.332509289729217,
|
|
"learning_rate": 8.426573426573428e-06,
|
|
"loss": 3.4587111473083496,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.2542016806722689,
|
|
"grad_norm": 10.967822109016453,
|
|
"learning_rate": 8.444055944055944e-06,
|
|
"loss": 3.0167973041534424,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.2547268907563025,
|
|
"grad_norm": 17.253531535564193,
|
|
"learning_rate": 8.461538461538462e-06,
|
|
"loss": 2.800180673599243,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.2552521008403361,
|
|
"grad_norm": 11.7873602705349,
|
|
"learning_rate": 8.47902097902098e-06,
|
|
"loss": 2.726072072982788,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.2557773109243697,
|
|
"grad_norm": 26.084283953713964,
|
|
"learning_rate": 8.496503496503497e-06,
|
|
"loss": 3.3081958293914795,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.25630252100840334,
|
|
"grad_norm": 23.353797537803327,
|
|
"learning_rate": 8.513986013986013e-06,
|
|
"loss": 2.4980058670043945,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.25682773109243695,
|
|
"grad_norm": 13.691435256067452,
|
|
"learning_rate": 8.531468531468533e-06,
|
|
"loss": 1.8919637203216553,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.25735294117647056,
|
|
"grad_norm": 11.418232071468879,
|
|
"learning_rate": 8.54895104895105e-06,
|
|
"loss": 2.665480613708496,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.25787815126050423,
|
|
"grad_norm": 16.665872215319176,
|
|
"learning_rate": 8.566433566433568e-06,
|
|
"loss": 2.935713052749634,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.25840336134453784,
|
|
"grad_norm": 25.55314521003453,
|
|
"learning_rate": 8.583916083916086e-06,
|
|
"loss": 2.6508588790893555,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.25892857142857145,
|
|
"grad_norm": 20.92869289645859,
|
|
"learning_rate": 8.601398601398602e-06,
|
|
"loss": 3.028712749481201,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.25945378151260506,
|
|
"grad_norm": 18.75020493469476,
|
|
"learning_rate": 8.61888111888112e-06,
|
|
"loss": 2.847996234893799,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.2599789915966387,
|
|
"grad_norm": 27.56630788936767,
|
|
"learning_rate": 8.636363636363637e-06,
|
|
"loss": 3.2749228477478027,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.2605042016806723,
|
|
"grad_norm": 13.69382630257854,
|
|
"learning_rate": 8.653846153846155e-06,
|
|
"loss": 3.1429078578948975,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.2610294117647059,
|
|
"grad_norm": 11.716274800000257,
|
|
"learning_rate": 8.671328671328672e-06,
|
|
"loss": 3.0947024822235107,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.2615546218487395,
|
|
"grad_norm": 18.28739725634593,
|
|
"learning_rate": 8.68881118881119e-06,
|
|
"loss": 2.3251190185546875,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.2620798319327731,
|
|
"grad_norm": 16.317437537387974,
|
|
"learning_rate": 8.706293706293708e-06,
|
|
"loss": 2.511931896209717,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.26260504201680673,
|
|
"grad_norm": 12.629147521616476,
|
|
"learning_rate": 8.723776223776225e-06,
|
|
"loss": 3.039483070373535,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26313025210084034,
|
|
"grad_norm": 17.417276755487123,
|
|
"learning_rate": 8.741258741258743e-06,
|
|
"loss": 2.0832390785217285,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.26365546218487396,
|
|
"grad_norm": 11.661959144751236,
|
|
"learning_rate": 8.758741258741259e-06,
|
|
"loss": 2.5170040130615234,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.26418067226890757,
|
|
"grad_norm": 8.587572578481677,
|
|
"learning_rate": 8.776223776223777e-06,
|
|
"loss": 2.963388681411743,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.2647058823529412,
|
|
"grad_norm": 9.814489827235624,
|
|
"learning_rate": 8.793706293706294e-06,
|
|
"loss": 2.2981114387512207,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.2652310924369748,
|
|
"grad_norm": 6.895356060394732,
|
|
"learning_rate": 8.811188811188812e-06,
|
|
"loss": 2.869265079498291,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.2657563025210084,
|
|
"grad_norm": 18.872287384439865,
|
|
"learning_rate": 8.82867132867133e-06,
|
|
"loss": 2.2735471725463867,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.266281512605042,
|
|
"grad_norm": 12.14165400771466,
|
|
"learning_rate": 8.846153846153847e-06,
|
|
"loss": 2.5119335651397705,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.2668067226890756,
|
|
"grad_norm": 10.156856628838941,
|
|
"learning_rate": 8.863636363636365e-06,
|
|
"loss": 2.6723649501800537,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.26733193277310924,
|
|
"grad_norm": 19.460735524933785,
|
|
"learning_rate": 8.881118881118883e-06,
|
|
"loss": 2.925903797149658,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.26785714285714285,
|
|
"grad_norm": 17.74393460064321,
|
|
"learning_rate": 8.8986013986014e-06,
|
|
"loss": 2.6899666786193848,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.26838235294117646,
|
|
"grad_norm": 24.150033870893935,
|
|
"learning_rate": 8.916083916083916e-06,
|
|
"loss": 3.411919116973877,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.2689075630252101,
|
|
"grad_norm": 8.436504371655232,
|
|
"learning_rate": 8.933566433566434e-06,
|
|
"loss": 2.6116344928741455,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.2694327731092437,
|
|
"grad_norm": 9.718603320457653,
|
|
"learning_rate": 8.951048951048951e-06,
|
|
"loss": 2.750563859939575,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.2699579831932773,
|
|
"grad_norm": 8.577525339758392,
|
|
"learning_rate": 8.968531468531469e-06,
|
|
"loss": 2.9900131225585938,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.2704831932773109,
|
|
"grad_norm": 9.72020801019397,
|
|
"learning_rate": 8.986013986013987e-06,
|
|
"loss": 2.7598955631256104,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.2710084033613445,
|
|
"grad_norm": 18.02025057464598,
|
|
"learning_rate": 9.003496503496504e-06,
|
|
"loss": 2.7998476028442383,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.27153361344537813,
|
|
"grad_norm": 14.894906994176859,
|
|
"learning_rate": 9.020979020979022e-06,
|
|
"loss": 3.013887643814087,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.27205882352941174,
|
|
"grad_norm": 9.049787877783231,
|
|
"learning_rate": 9.03846153846154e-06,
|
|
"loss": 2.826267719268799,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.27258403361344535,
|
|
"grad_norm": 7.573154489562956,
|
|
"learning_rate": 9.055944055944057e-06,
|
|
"loss": 2.8274731636047363,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.27310924369747897,
|
|
"grad_norm": 21.913540817150594,
|
|
"learning_rate": 9.073426573426573e-06,
|
|
"loss": 3.278071641921997,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.27363445378151263,
|
|
"grad_norm": 15.180990763462034,
|
|
"learning_rate": 9.090909090909091e-06,
|
|
"loss": 2.5987932682037354,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.27415966386554624,
|
|
"grad_norm": 7.783671375514505,
|
|
"learning_rate": 9.108391608391609e-06,
|
|
"loss": 2.1832573413848877,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.27468487394957986,
|
|
"grad_norm": 10.721651287282956,
|
|
"learning_rate": 9.125874125874126e-06,
|
|
"loss": 3.219067096710205,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.27521008403361347,
|
|
"grad_norm": 13.116446727203488,
|
|
"learning_rate": 9.143356643356644e-06,
|
|
"loss": 2.9926705360412598,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.2757352941176471,
|
|
"grad_norm": 9.655360562971806,
|
|
"learning_rate": 9.160839160839162e-06,
|
|
"loss": 2.256592273712158,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.2762605042016807,
|
|
"grad_norm": 10.519913346333762,
|
|
"learning_rate": 9.17832167832168e-06,
|
|
"loss": 3.0461020469665527,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.2767857142857143,
|
|
"grad_norm": 13.293942326735147,
|
|
"learning_rate": 9.195804195804197e-06,
|
|
"loss": 3.2841827869415283,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.2773109243697479,
|
|
"grad_norm": 9.69975674353828,
|
|
"learning_rate": 9.213286713286715e-06,
|
|
"loss": 2.6983697414398193,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.2778361344537815,
|
|
"grad_norm": 13.908294823435138,
|
|
"learning_rate": 9.230769230769232e-06,
|
|
"loss": 2.669904947280884,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.27836134453781514,
|
|
"grad_norm": 15.020927144946038,
|
|
"learning_rate": 9.248251748251748e-06,
|
|
"loss": 2.8003804683685303,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.27888655462184875,
|
|
"grad_norm": 8.145015265202593,
|
|
"learning_rate": 9.265734265734266e-06,
|
|
"loss": 2.855621337890625,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.27941176470588236,
|
|
"grad_norm": 9.217520734527652,
|
|
"learning_rate": 9.283216783216784e-06,
|
|
"loss": 3.296477794647217,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.27993697478991597,
|
|
"grad_norm": 11.001708480571994,
|
|
"learning_rate": 9.300699300699301e-06,
|
|
"loss": 3.406341791152954,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.2804621848739496,
|
|
"grad_norm": 14.220941141028558,
|
|
"learning_rate": 9.318181818181819e-06,
|
|
"loss": 2.766058921813965,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.2809873949579832,
|
|
"grad_norm": 16.019050997199002,
|
|
"learning_rate": 9.335664335664337e-06,
|
|
"loss": 2.7180471420288086,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.2815126050420168,
|
|
"grad_norm": 13.57650452804989,
|
|
"learning_rate": 9.353146853146854e-06,
|
|
"loss": 2.658740997314453,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.2820378151260504,
|
|
"grad_norm": 12.403016551952248,
|
|
"learning_rate": 9.370629370629372e-06,
|
|
"loss": 3.016836166381836,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.28256302521008403,
|
|
"grad_norm": 12.52016091546998,
|
|
"learning_rate": 9.38811188811189e-06,
|
|
"loss": 2.647848129272461,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.28308823529411764,
|
|
"grad_norm": 9.35663561444136,
|
|
"learning_rate": 9.405594405594406e-06,
|
|
"loss": 3.000723361968994,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.28361344537815125,
|
|
"grad_norm": 6.754483303180034,
|
|
"learning_rate": 9.423076923076923e-06,
|
|
"loss": 2.221613883972168,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.28413865546218486,
|
|
"grad_norm": 10.081871721267813,
|
|
"learning_rate": 9.44055944055944e-06,
|
|
"loss": 2.757201910018921,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.2846638655462185,
|
|
"grad_norm": 8.432746811039069,
|
|
"learning_rate": 9.458041958041958e-06,
|
|
"loss": 2.7212443351745605,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.2851890756302521,
|
|
"grad_norm": 12.828062679779878,
|
|
"learning_rate": 9.475524475524476e-06,
|
|
"loss": 2.9237170219421387,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"grad_norm": 9.160119559898765,
|
|
"learning_rate": 9.493006993006994e-06,
|
|
"loss": 2.876392364501953,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.2862394957983193,
|
|
"grad_norm": 8.948718551105781,
|
|
"learning_rate": 9.510489510489511e-06,
|
|
"loss": 2.8873000144958496,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.2867647058823529,
|
|
"grad_norm": 16.11031612097912,
|
|
"learning_rate": 9.527972027972029e-06,
|
|
"loss": 2.6230111122131348,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.28728991596638653,
|
|
"grad_norm": 21.929482022240183,
|
|
"learning_rate": 9.545454545454547e-06,
|
|
"loss": 2.9438681602478027,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.28781512605042014,
|
|
"grad_norm": 12.426071712303534,
|
|
"learning_rate": 9.562937062937063e-06,
|
|
"loss": 2.5408456325531006,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.28834033613445376,
|
|
"grad_norm": 11.414531240521558,
|
|
"learning_rate": 9.58041958041958e-06,
|
|
"loss": 3.2351088523864746,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.28886554621848737,
|
|
"grad_norm": 9.40392398669366,
|
|
"learning_rate": 9.597902097902098e-06,
|
|
"loss": 2.4242911338806152,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.28939075630252103,
|
|
"grad_norm": 9.799732867736692,
|
|
"learning_rate": 9.615384615384616e-06,
|
|
"loss": 2.551866292953491,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.28991596638655465,
|
|
"grad_norm": 16.78383984737307,
|
|
"learning_rate": 9.632867132867133e-06,
|
|
"loss": 2.7000250816345215,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.29044117647058826,
|
|
"grad_norm": 10.934529013401667,
|
|
"learning_rate": 9.650349650349651e-06,
|
|
"loss": 2.7635700702667236,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.29096638655462187,
|
|
"grad_norm": 9.616026433955014,
|
|
"learning_rate": 9.667832167832169e-06,
|
|
"loss": 2.279999017715454,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.2914915966386555,
|
|
"grad_norm": 8.212243251536766,
|
|
"learning_rate": 9.685314685314686e-06,
|
|
"loss": 2.586347818374634,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.2920168067226891,
|
|
"grad_norm": 11.613308358122108,
|
|
"learning_rate": 9.702797202797204e-06,
|
|
"loss": 2.9352517127990723,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.2925420168067227,
|
|
"grad_norm": 16.131150380536646,
|
|
"learning_rate": 9.72027972027972e-06,
|
|
"loss": 2.757617235183716,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.2930672268907563,
|
|
"grad_norm": 11.79514279059609,
|
|
"learning_rate": 9.737762237762238e-06,
|
|
"loss": 2.605280876159668,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.2935924369747899,
|
|
"grad_norm": 7.6099755173044805,
|
|
"learning_rate": 9.755244755244755e-06,
|
|
"loss": 2.793330192565918,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.29411764705882354,
|
|
"grad_norm": 12.937770860201477,
|
|
"learning_rate": 9.772727272727273e-06,
|
|
"loss": 2.594179153442383,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.29464285714285715,
|
|
"grad_norm": 11.099571079329944,
|
|
"learning_rate": 9.79020979020979e-06,
|
|
"loss": 2.3938279151916504,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.29516806722689076,
|
|
"grad_norm": 10.38188967863859,
|
|
"learning_rate": 9.807692307692308e-06,
|
|
"loss": 3.088261127471924,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.2956932773109244,
|
|
"grad_norm": 11.598487313259774,
|
|
"learning_rate": 9.825174825174826e-06,
|
|
"loss": 3.0211265087127686,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.296218487394958,
|
|
"grad_norm": 14.51034321124632,
|
|
"learning_rate": 9.842657342657344e-06,
|
|
"loss": 3.197432279586792,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.2967436974789916,
|
|
"grad_norm": 25.63509858878865,
|
|
"learning_rate": 9.860139860139861e-06,
|
|
"loss": 2.7388033866882324,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.2972689075630252,
|
|
"grad_norm": 17.08255545887913,
|
|
"learning_rate": 9.877622377622379e-06,
|
|
"loss": 3.1421351432800293,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.2977941176470588,
|
|
"grad_norm": 14.491041482152639,
|
|
"learning_rate": 9.895104895104895e-06,
|
|
"loss": 2.792797088623047,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.29831932773109243,
|
|
"grad_norm": 11.516514066126438,
|
|
"learning_rate": 9.912587412587413e-06,
|
|
"loss": 3.216681957244873,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.29884453781512604,
|
|
"grad_norm": 15.277721323551134,
|
|
"learning_rate": 9.93006993006993e-06,
|
|
"loss": 2.2214996814727783,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.29936974789915966,
|
|
"grad_norm": 9.908637980577337,
|
|
"learning_rate": 9.94755244755245e-06,
|
|
"loss": 2.3919496536254883,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.29989495798319327,
|
|
"grad_norm": 11.157153523666555,
|
|
"learning_rate": 9.965034965034966e-06,
|
|
"loss": 2.538166046142578,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.3004201680672269,
|
|
"grad_norm": 11.56868921848655,
|
|
"learning_rate": 9.982517482517483e-06,
|
|
"loss": 2.437208414077759,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.3009453781512605,
|
|
"grad_norm": 8.366580030672965,
|
|
"learning_rate": 1e-05,
|
|
"loss": 2.69106388092041,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.3014705882352941,
|
|
"grad_norm": 8.285082482324782,
|
|
"learning_rate": 9.999999066071773e-06,
|
|
"loss": 2.420135259628296,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.3019957983193277,
|
|
"grad_norm": 18.337761587305817,
|
|
"learning_rate": 9.999996264287436e-06,
|
|
"loss": 2.271578311920166,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.3025210084033613,
|
|
"grad_norm": 18.091609827817756,
|
|
"learning_rate": 9.999991594648035e-06,
|
|
"loss": 3.1087193489074707,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.30304621848739494,
|
|
"grad_norm": 10.727849393430924,
|
|
"learning_rate": 9.999985057155316e-06,
|
|
"loss": 2.7490944862365723,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.30357142857142855,
|
|
"grad_norm": 15.761615720732681,
|
|
"learning_rate": 9.999976651811724e-06,
|
|
"loss": 1.8860646486282349,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.30409663865546216,
|
|
"grad_norm": 19.854802786471527,
|
|
"learning_rate": 9.999966378620396e-06,
|
|
"loss": 2.6921753883361816,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.30462184873949577,
|
|
"grad_norm": 14.669702649740051,
|
|
"learning_rate": 9.99995423758517e-06,
|
|
"loss": 2.938077449798584,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.30514705882352944,
|
|
"grad_norm": 13.794604407278413,
|
|
"learning_rate": 9.999940228710581e-06,
|
|
"loss": 2.822067975997925,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.30567226890756305,
|
|
"grad_norm": 13.655086277625093,
|
|
"learning_rate": 9.999924352001864e-06,
|
|
"loss": 2.838697910308838,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.30619747899159666,
|
|
"grad_norm": 10.698956690256324,
|
|
"learning_rate": 9.99990660746495e-06,
|
|
"loss": 3.6764464378356934,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.3067226890756303,
|
|
"grad_norm": 7.9104080477242,
|
|
"learning_rate": 9.999886995106467e-06,
|
|
"loss": 2.723898410797119,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.3072478991596639,
|
|
"grad_norm": 12.249625648796686,
|
|
"learning_rate": 9.99986551493374e-06,
|
|
"loss": 3.1120963096618652,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.3077731092436975,
|
|
"grad_norm": 14.439861817712407,
|
|
"learning_rate": 9.999842166954797e-06,
|
|
"loss": 2.98933744430542,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.3082983193277311,
|
|
"grad_norm": 14.302343534185399,
|
|
"learning_rate": 9.999816951178356e-06,
|
|
"loss": 2.622647285461426,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.3088235294117647,
|
|
"grad_norm": 38.37469850342068,
|
|
"learning_rate": 9.99978986761384e-06,
|
|
"loss": 2.3630852699279785,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.30934873949579833,
|
|
"grad_norm": 13.073607679409657,
|
|
"learning_rate": 9.999760916271368e-06,
|
|
"loss": 2.528589963912964,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.30987394957983194,
|
|
"grad_norm": 11.144728729471069,
|
|
"learning_rate": 9.99973009716175e-06,
|
|
"loss": 3.504377841949463,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.31039915966386555,
|
|
"grad_norm": 11.27268551570553,
|
|
"learning_rate": 9.999697410296505e-06,
|
|
"loss": 2.9286673069000244,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.31092436974789917,
|
|
"grad_norm": 17.618527421243975,
|
|
"learning_rate": 9.99966285568784e-06,
|
|
"loss": 3.3268320560455322,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.3114495798319328,
|
|
"grad_norm": 12.285303824543004,
|
|
"learning_rate": 9.999626433348664e-06,
|
|
"loss": 2.728505849838257,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.3119747899159664,
|
|
"grad_norm": 8.138294552039829,
|
|
"learning_rate": 9.999588143292584e-06,
|
|
"loss": 2.930448532104492,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.3125,
|
|
"grad_norm": 9.927962429564628,
|
|
"learning_rate": 9.999547985533905e-06,
|
|
"loss": 3.1505517959594727,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.3130252100840336,
|
|
"grad_norm": 5.922799171737987,
|
|
"learning_rate": 9.999505960087627e-06,
|
|
"loss": 2.8477983474731445,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.3135504201680672,
|
|
"grad_norm": 13.944936899177495,
|
|
"learning_rate": 9.999462066969451e-06,
|
|
"loss": 3.0594921112060547,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.31407563025210083,
|
|
"grad_norm": 16.090994911441584,
|
|
"learning_rate": 9.999416306195775e-06,
|
|
"loss": 2.8699352741241455,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.31460084033613445,
|
|
"grad_norm": 15.377709468639337,
|
|
"learning_rate": 9.999368677783691e-06,
|
|
"loss": 1.935495376586914,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.31512605042016806,
|
|
"grad_norm": 8.588582192713162,
|
|
"learning_rate": 9.999319181750993e-06,
|
|
"loss": 2.664287567138672,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.31565126050420167,
|
|
"grad_norm": 14.800403011758473,
|
|
"learning_rate": 9.999267818116173e-06,
|
|
"loss": 2.7809691429138184,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.3161764705882353,
|
|
"grad_norm": 11.687554735482323,
|
|
"learning_rate": 9.999214586898417e-06,
|
|
"loss": 2.6982336044311523,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.3167016806722689,
|
|
"grad_norm": 7.660259977047418,
|
|
"learning_rate": 9.99915948811761e-06,
|
|
"loss": 2.6417441368103027,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.3172268907563025,
|
|
"grad_norm": 15.272726013049263,
|
|
"learning_rate": 9.999102521794336e-06,
|
|
"loss": 2.302229404449463,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.3177521008403361,
|
|
"grad_norm": 6.73413097294637,
|
|
"learning_rate": 9.999043687949878e-06,
|
|
"loss": 3.0615999698638916,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.3182773109243697,
|
|
"grad_norm": 8.989449343418547,
|
|
"learning_rate": 9.998982986606214e-06,
|
|
"loss": 2.5908992290496826,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.31880252100840334,
|
|
"grad_norm": 11.279803822617207,
|
|
"learning_rate": 9.998920417786018e-06,
|
|
"loss": 2.433568000793457,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.31932773109243695,
|
|
"grad_norm": 10.712145177485182,
|
|
"learning_rate": 9.998855981512665e-06,
|
|
"loss": 2.5364127159118652,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.31985294117647056,
|
|
"grad_norm": 9.44916528577445,
|
|
"learning_rate": 9.998789677810226e-06,
|
|
"loss": 2.695469617843628,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.32037815126050423,
|
|
"grad_norm": 15.717511398115636,
|
|
"learning_rate": 9.998721506703473e-06,
|
|
"loss": 2.3882129192352295,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.32090336134453784,
|
|
"grad_norm": 16.480968240086398,
|
|
"learning_rate": 9.998651468217869e-06,
|
|
"loss": 3.1402809619903564,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.32142857142857145,
|
|
"grad_norm": 11.077375672666998,
|
|
"learning_rate": 9.99857956237958e-06,
|
|
"loss": 2.8062262535095215,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.32195378151260506,
|
|
"grad_norm": 7.429179430952721,
|
|
"learning_rate": 9.998505789215469e-06,
|
|
"loss": 2.9973835945129395,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.3224789915966387,
|
|
"grad_norm": 9.403513277942446,
|
|
"learning_rate": 9.998430148753095e-06,
|
|
"loss": 2.661933422088623,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.3230042016806723,
|
|
"grad_norm": 25.09734450638449,
|
|
"learning_rate": 9.998352641020714e-06,
|
|
"loss": 3.1352996826171875,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.3235294117647059,
|
|
"grad_norm": 20.659801255635177,
|
|
"learning_rate": 9.99827326604728e-06,
|
|
"loss": 2.932847261428833,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.3240546218487395,
|
|
"grad_norm": 9.229192777393214,
|
|
"learning_rate": 9.998192023862448e-06,
|
|
"loss": 2.449533462524414,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.3245798319327731,
|
|
"grad_norm": 17.396599362794852,
|
|
"learning_rate": 9.998108914496567e-06,
|
|
"loss": 2.4606223106384277,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.32510504201680673,
|
|
"grad_norm": 6.589611594303426,
|
|
"learning_rate": 9.998023937980683e-06,
|
|
"loss": 2.7767136096954346,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.32563025210084034,
|
|
"grad_norm": 13.587399901755328,
|
|
"learning_rate": 9.997937094346542e-06,
|
|
"loss": 3.07078218460083,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.32615546218487396,
|
|
"grad_norm": 13.757165659427377,
|
|
"learning_rate": 9.997848383626583e-06,
|
|
"loss": 2.532907009124756,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.32668067226890757,
|
|
"grad_norm": 15.826090861905238,
|
|
"learning_rate": 9.997757805853951e-06,
|
|
"loss": 1.9090718030929565,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.3272058823529412,
|
|
"grad_norm": 8.401318999924264,
|
|
"learning_rate": 9.99766536106248e-06,
|
|
"loss": 2.527989625930786,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.3277310924369748,
|
|
"grad_norm": 14.196562630426328,
|
|
"learning_rate": 9.997571049286706e-06,
|
|
"loss": 2.8110876083374023,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.3282563025210084,
|
|
"grad_norm": 11.535703972555819,
|
|
"learning_rate": 9.997474870561858e-06,
|
|
"loss": 3.0162670612335205,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.328781512605042,
|
|
"grad_norm": 14.388689512029586,
|
|
"learning_rate": 9.99737682492387e-06,
|
|
"loss": 2.6724231243133545,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.3293067226890756,
|
|
"grad_norm": 20.552179177835775,
|
|
"learning_rate": 9.997276912409369e-06,
|
|
"loss": 2.8142333030700684,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.32983193277310924,
|
|
"grad_norm": 13.237498962761254,
|
|
"learning_rate": 9.997175133055676e-06,
|
|
"loss": 2.8185126781463623,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.33035714285714285,
|
|
"grad_norm": 9.583123197551005,
|
|
"learning_rate": 9.997071486900813e-06,
|
|
"loss": 2.719855785369873,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.33088235294117646,
|
|
"grad_norm": 13.478794221532347,
|
|
"learning_rate": 9.996965973983503e-06,
|
|
"loss": 3.1515822410583496,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.3314075630252101,
|
|
"grad_norm": 10.978117747492535,
|
|
"learning_rate": 9.996858594343159e-06,
|
|
"loss": 3.1971404552459717,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.3319327731092437,
|
|
"grad_norm": 18.07855511683476,
|
|
"learning_rate": 9.996749348019898e-06,
|
|
"loss": 3.107539176940918,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.3324579831932773,
|
|
"grad_norm": 8.18501392389578,
|
|
"learning_rate": 9.996638235054527e-06,
|
|
"loss": 2.682018280029297,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.3329831932773109,
|
|
"grad_norm": 12.184516047650115,
|
|
"learning_rate": 9.996525255488559e-06,
|
|
"loss": 2.6156351566314697,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.3335084033613445,
|
|
"grad_norm": 26.49471236163556,
|
|
"learning_rate": 9.996410409364198e-06,
|
|
"loss": 3.175257682800293,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.33403361344537813,
|
|
"grad_norm": 7.755694969661678,
|
|
"learning_rate": 9.996293696724347e-06,
|
|
"loss": 2.5734364986419678,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.33455882352941174,
|
|
"grad_norm": 9.856280234345837,
|
|
"learning_rate": 9.996175117612608e-06,
|
|
"loss": 2.624603271484375,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.33508403361344535,
|
|
"grad_norm": 9.377322848668618,
|
|
"learning_rate": 9.996054672073276e-06,
|
|
"loss": 2.9139628410339355,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.33560924369747897,
|
|
"grad_norm": 8.708097433883548,
|
|
"learning_rate": 9.995932360151348e-06,
|
|
"loss": 2.903323173522949,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.33613445378151263,
|
|
"grad_norm": 22.5662372687559,
|
|
"learning_rate": 9.995808181892516e-06,
|
|
"loss": 2.6533048152923584,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.33665966386554624,
|
|
"grad_norm": 11.27337849539793,
|
|
"learning_rate": 9.99568213734317e-06,
|
|
"loss": 2.5544841289520264,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.33718487394957986,
|
|
"grad_norm": 25.097768822401978,
|
|
"learning_rate": 9.995554226550395e-06,
|
|
"loss": 3.004918098449707,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.33771008403361347,
|
|
"grad_norm": 13.747250602715669,
|
|
"learning_rate": 9.995424449561974e-06,
|
|
"loss": 3.2306368350982666,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.3382352941176471,
|
|
"grad_norm": 8.706127255035039,
|
|
"learning_rate": 9.995292806426392e-06,
|
|
"loss": 2.882741928100586,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.3387605042016807,
|
|
"grad_norm": 12.898703667341863,
|
|
"learning_rate": 9.995159297192824e-06,
|
|
"loss": 2.462981700897217,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.3392857142857143,
|
|
"grad_norm": 8.940322682188675,
|
|
"learning_rate": 9.995023921911146e-06,
|
|
"loss": 2.655332088470459,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.3398109243697479,
|
|
"grad_norm": 8.829986905443867,
|
|
"learning_rate": 9.99488668063193e-06,
|
|
"loss": 3.0068254470825195,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.3403361344537815,
|
|
"grad_norm": 8.735286812318217,
|
|
"learning_rate": 9.994747573406444e-06,
|
|
"loss": 2.6651973724365234,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.34086134453781514,
|
|
"grad_norm": 10.27315507806615,
|
|
"learning_rate": 9.99460660028666e-06,
|
|
"loss": 2.4464921951293945,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.34138655462184875,
|
|
"grad_norm": 7.155452815715878,
|
|
"learning_rate": 9.994463761325235e-06,
|
|
"loss": 2.3645894527435303,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.34191176470588236,
|
|
"grad_norm": 12.28602457587812,
|
|
"learning_rate": 9.994319056575532e-06,
|
|
"loss": 2.9508180618286133,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.34243697478991597,
|
|
"grad_norm": 8.509858158883015,
|
|
"learning_rate": 9.99417248609161e-06,
|
|
"loss": 3.0886614322662354,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.3429621848739496,
|
|
"grad_norm": 7.63107284880161,
|
|
"learning_rate": 9.994024049928222e-06,
|
|
"loss": 3.215698480606079,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.3434873949579832,
|
|
"grad_norm": 16.02061369261867,
|
|
"learning_rate": 9.99387374814082e-06,
|
|
"loss": 1.5853204727172852,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.3440126050420168,
|
|
"grad_norm": 12.599815277623422,
|
|
"learning_rate": 9.99372158078555e-06,
|
|
"loss": 2.880288600921631,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.3445378151260504,
|
|
"grad_norm": 23.718360310240516,
|
|
"learning_rate": 9.99356754791926e-06,
|
|
"loss": 2.114903450012207,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.34506302521008403,
|
|
"grad_norm": 15.218989533301585,
|
|
"learning_rate": 9.993411649599494e-06,
|
|
"loss": 2.7294764518737793,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.34558823529411764,
|
|
"grad_norm": 10.25377666124804,
|
|
"learning_rate": 9.993253885884488e-06,
|
|
"loss": 2.971414566040039,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.34611344537815125,
|
|
"grad_norm": 10.812972960374735,
|
|
"learning_rate": 9.993094256833178e-06,
|
|
"loss": 2.6103994846343994,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.34663865546218486,
|
|
"grad_norm": 8.598787979315924,
|
|
"learning_rate": 9.992932762505198e-06,
|
|
"loss": 2.697841167449951,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.3471638655462185,
|
|
"grad_norm": 14.817163668782701,
|
|
"learning_rate": 9.992769402960878e-06,
|
|
"loss": 2.9950594902038574,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.3476890756302521,
|
|
"grad_norm": 11.638383460998597,
|
|
"learning_rate": 9.992604178261242e-06,
|
|
"loss": 2.944634437561035,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.3482142857142857,
|
|
"grad_norm": 10.830593207520312,
|
|
"learning_rate": 9.992437088468016e-06,
|
|
"loss": 2.856689214706421,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.3487394957983193,
|
|
"grad_norm": 10.553427456128704,
|
|
"learning_rate": 9.992268133643622e-06,
|
|
"loss": 2.556269645690918,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.3492647058823529,
|
|
"grad_norm": 18.737148533839182,
|
|
"learning_rate": 9.99209731385117e-06,
|
|
"loss": 2.78225040435791,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.34978991596638653,
|
|
"grad_norm": 22.82662907356697,
|
|
"learning_rate": 9.991924629154476e-06,
|
|
"loss": 2.0274147987365723,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.35031512605042014,
|
|
"grad_norm": 15.543809015361044,
|
|
"learning_rate": 9.991750079618054e-06,
|
|
"loss": 2.732177734375,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.35084033613445376,
|
|
"grad_norm": 9.785556149667748,
|
|
"learning_rate": 9.991573665307108e-06,
|
|
"loss": 2.142753839492798,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.35136554621848737,
|
|
"grad_norm": 36.13433828190088,
|
|
"learning_rate": 9.99139538628754e-06,
|
|
"loss": 2.969231128692627,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.35189075630252103,
|
|
"grad_norm": 10.418003369691215,
|
|
"learning_rate": 9.991215242625948e-06,
|
|
"loss": 3.4130592346191406,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.35241596638655465,
|
|
"grad_norm": 10.45910909072077,
|
|
"learning_rate": 9.991033234389636e-06,
|
|
"loss": 2.8507094383239746,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.35294117647058826,
|
|
"grad_norm": 11.296132395257468,
|
|
"learning_rate": 9.99084936164659e-06,
|
|
"loss": 2.2903499603271484,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.35346638655462187,
|
|
"grad_norm": 11.822520466773746,
|
|
"learning_rate": 9.990663624465504e-06,
|
|
"loss": 2.813413381576538,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.3539915966386555,
|
|
"grad_norm": 12.45634710640318,
|
|
"learning_rate": 9.990476022915761e-06,
|
|
"loss": 3.4090490341186523,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.3545168067226891,
|
|
"grad_norm": 12.816620367166667,
|
|
"learning_rate": 9.990286557067443e-06,
|
|
"loss": 2.671079635620117,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.3550420168067227,
|
|
"grad_norm": 7.775861620360174,
|
|
"learning_rate": 9.990095226991334e-06,
|
|
"loss": 3.0635085105895996,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.3555672268907563,
|
|
"grad_norm": 7.128343265286214,
|
|
"learning_rate": 9.989902032758904e-06,
|
|
"loss": 2.5118045806884766,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.3560924369747899,
|
|
"grad_norm": 9.23231361002266,
|
|
"learning_rate": 9.989706974442329e-06,
|
|
"loss": 2.978017807006836,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.35661764705882354,
|
|
"grad_norm": 11.130901634177837,
|
|
"learning_rate": 9.989510052114473e-06,
|
|
"loss": 2.4600491523742676,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 46.64047674219602,
|
|
"learning_rate": 9.989311265848905e-06,
|
|
"loss": 2.603870153427124,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.35766806722689076,
|
|
"grad_norm": 10.503936598310489,
|
|
"learning_rate": 9.989110615719882e-06,
|
|
"loss": 2.6567111015319824,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.3581932773109244,
|
|
"grad_norm": 12.325303927314826,
|
|
"learning_rate": 9.988908101802361e-06,
|
|
"loss": 2.937870740890503,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.358718487394958,
|
|
"grad_norm": 9.066199122716661,
|
|
"learning_rate": 9.988703724172e-06,
|
|
"loss": 2.1229398250579834,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.3592436974789916,
|
|
"grad_norm": 15.897454227218814,
|
|
"learning_rate": 9.988497482905145e-06,
|
|
"loss": 3.0912346839904785,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.3597689075630252,
|
|
"grad_norm": 20.734995219911607,
|
|
"learning_rate": 9.988289378078842e-06,
|
|
"loss": 3.042353630065918,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.3602941176470588,
|
|
"grad_norm": 13.937087303746189,
|
|
"learning_rate": 9.988079409770832e-06,
|
|
"loss": 2.5730111598968506,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.36081932773109243,
|
|
"grad_norm": 18.68167099777117,
|
|
"learning_rate": 9.987867578059557e-06,
|
|
"loss": 3.8723182678222656,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.36134453781512604,
|
|
"grad_norm": 14.050087711676303,
|
|
"learning_rate": 9.987653883024147e-06,
|
|
"loss": 2.8078064918518066,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.36186974789915966,
|
|
"grad_norm": 10.431859572774215,
|
|
"learning_rate": 9.987438324744437e-06,
|
|
"loss": 3.016974449157715,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.36239495798319327,
|
|
"grad_norm": 11.822299541853985,
|
|
"learning_rate": 9.987220903300947e-06,
|
|
"loss": 3.129451036453247,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.3629201680672269,
|
|
"grad_norm": 9.636064124635695,
|
|
"learning_rate": 9.987001618774906e-06,
|
|
"loss": 2.768364667892456,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.3634453781512605,
|
|
"grad_norm": 7.7086105081417875,
|
|
"learning_rate": 9.986780471248228e-06,
|
|
"loss": 2.748843193054199,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.3639705882352941,
|
|
"grad_norm": 8.25025639760207,
|
|
"learning_rate": 9.986557460803527e-06,
|
|
"loss": 2.6695477962493896,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.3644957983193277,
|
|
"grad_norm": 21.101621960287538,
|
|
"learning_rate": 9.98633258752412e-06,
|
|
"loss": 2.9242889881134033,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.3650210084033613,
|
|
"grad_norm": 7.915319275851179,
|
|
"learning_rate": 9.986105851494003e-06,
|
|
"loss": 2.942355155944824,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.36554621848739494,
|
|
"grad_norm": 9.792091922597695,
|
|
"learning_rate": 9.985877252797887e-06,
|
|
"loss": 1.7359246015548706,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.36607142857142855,
|
|
"grad_norm": 12.917423002334813,
|
|
"learning_rate": 9.985646791521165e-06,
|
|
"loss": 2.9983890056610107,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.36659663865546216,
|
|
"grad_norm": 35.091463458302385,
|
|
"learning_rate": 9.98541446774993e-06,
|
|
"loss": 3.2080330848693848,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.36712184873949577,
|
|
"grad_norm": 12.125766825502915,
|
|
"learning_rate": 9.985180281570976e-06,
|
|
"loss": 2.50394868850708,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.36764705882352944,
|
|
"grad_norm": 7.844200768249517,
|
|
"learning_rate": 9.984944233071785e-06,
|
|
"loss": 2.4198319911956787,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.36817226890756305,
|
|
"grad_norm": 10.404307824162935,
|
|
"learning_rate": 9.984706322340539e-06,
|
|
"loss": 2.928926467895508,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.36869747899159666,
|
|
"grad_norm": 10.490212869826033,
|
|
"learning_rate": 9.984466549466112e-06,
|
|
"loss": 2.9764275550842285,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.3692226890756303,
|
|
"grad_norm": 9.884497573900134,
|
|
"learning_rate": 9.98422491453808e-06,
|
|
"loss": 2.9621715545654297,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.3697478991596639,
|
|
"grad_norm": 9.942147288667627,
|
|
"learning_rate": 9.98398141764671e-06,
|
|
"loss": 2.4516334533691406,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.3702731092436975,
|
|
"grad_norm": 12.937384346927333,
|
|
"learning_rate": 9.983736058882965e-06,
|
|
"loss": 2.8038949966430664,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.3707983193277311,
|
|
"grad_norm": 8.693898923683015,
|
|
"learning_rate": 9.983488838338504e-06,
|
|
"loss": 3.0094220638275146,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.3713235294117647,
|
|
"grad_norm": 10.708732196701385,
|
|
"learning_rate": 9.98323975610568e-06,
|
|
"loss": 2.505156993865967,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.37184873949579833,
|
|
"grad_norm": 8.262363197685096,
|
|
"learning_rate": 9.982988812277544e-06,
|
|
"loss": 2.4521894454956055,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.37237394957983194,
|
|
"grad_norm": 8.759386744985079,
|
|
"learning_rate": 9.982736006947842e-06,
|
|
"loss": 2.5744028091430664,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.37289915966386555,
|
|
"grad_norm": 17.34528310589552,
|
|
"learning_rate": 9.982481340211016e-06,
|
|
"loss": 3.3058454990386963,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.37342436974789917,
|
|
"grad_norm": 22.382439682764094,
|
|
"learning_rate": 9.9822248121622e-06,
|
|
"loss": 3.381484031677246,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.3739495798319328,
|
|
"grad_norm": 32.160743822492435,
|
|
"learning_rate": 9.981966422897225e-06,
|
|
"loss": 2.116076946258545,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.3744747899159664,
|
|
"grad_norm": 11.865491148282732,
|
|
"learning_rate": 9.98170617251262e-06,
|
|
"loss": 2.513551950454712,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.375,
|
|
"grad_norm": 9.803448733419238,
|
|
"learning_rate": 9.981444061105607e-06,
|
|
"loss": 2.6778907775878906,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.3755252100840336,
|
|
"grad_norm": 15.056919870056147,
|
|
"learning_rate": 9.9811800887741e-06,
|
|
"loss": 2.695645332336426,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.3760504201680672,
|
|
"grad_norm": 7.337704853621016,
|
|
"learning_rate": 9.980914255616716e-06,
|
|
"loss": 2.4225122928619385,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.37657563025210083,
|
|
"grad_norm": 51.695039374935995,
|
|
"learning_rate": 9.98064656173276e-06,
|
|
"loss": 3.8589346408843994,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.37710084033613445,
|
|
"grad_norm": 16.11942131442461,
|
|
"learning_rate": 9.980377007222236e-06,
|
|
"loss": 2.013482093811035,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.37762605042016806,
|
|
"grad_norm": 9.933013258491656,
|
|
"learning_rate": 9.980105592185838e-06,
|
|
"loss": 2.68209171295166,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.37815126050420167,
|
|
"grad_norm": 14.001108544199091,
|
|
"learning_rate": 9.979832316724965e-06,
|
|
"loss": 2.5532543659210205,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.3786764705882353,
|
|
"grad_norm": 7.912448258198673,
|
|
"learning_rate": 9.979557180941702e-06,
|
|
"loss": 2.7311716079711914,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.3792016806722689,
|
|
"grad_norm": 13.435411246242886,
|
|
"learning_rate": 9.979280184938829e-06,
|
|
"loss": 2.514404296875,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.3797268907563025,
|
|
"grad_norm": 13.030047357377368,
|
|
"learning_rate": 9.979001328819828e-06,
|
|
"loss": 3.085047721862793,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.3802521008403361,
|
|
"grad_norm": 9.747365344941048,
|
|
"learning_rate": 9.97872061268887e-06,
|
|
"loss": 2.680142641067505,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.3807773109243697,
|
|
"grad_norm": 8.138043787406302,
|
|
"learning_rate": 9.978438036650822e-06,
|
|
"loss": 2.492799758911133,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.38130252100840334,
|
|
"grad_norm": 9.370875617288384,
|
|
"learning_rate": 9.978153600811247e-06,
|
|
"loss": 2.7892208099365234,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.38182773109243695,
|
|
"grad_norm": 8.580976998458176,
|
|
"learning_rate": 9.977867305276403e-06,
|
|
"loss": 2.453538417816162,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.38235294117647056,
|
|
"grad_norm": 10.991432407863732,
|
|
"learning_rate": 9.97757915015324e-06,
|
|
"loss": 2.6907737255096436,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.38287815126050423,
|
|
"grad_norm": 14.510205789056386,
|
|
"learning_rate": 9.977289135549404e-06,
|
|
"loss": 2.8692243099212646,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.38340336134453784,
|
|
"grad_norm": 7.913979618741548,
|
|
"learning_rate": 9.976997261573239e-06,
|
|
"loss": 2.712153911590576,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.38392857142857145,
|
|
"grad_norm": 6.721599599733559,
|
|
"learning_rate": 9.976703528333777e-06,
|
|
"loss": 3.417105197906494,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.38445378151260506,
|
|
"grad_norm": 9.974817268351917,
|
|
"learning_rate": 9.976407935940753e-06,
|
|
"loss": 2.704678773880005,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.3849789915966387,
|
|
"grad_norm": 17.857088853020603,
|
|
"learning_rate": 9.976110484504587e-06,
|
|
"loss": 2.9578113555908203,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.3855042016806723,
|
|
"grad_norm": 10.613227127445194,
|
|
"learning_rate": 9.975811174136401e-06,
|
|
"loss": 2.7810139656066895,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.3860294117647059,
|
|
"grad_norm": 10.190958240778137,
|
|
"learning_rate": 9.97551000494801e-06,
|
|
"loss": 3.049150228500366,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.3865546218487395,
|
|
"grad_norm": 68.69548371753692,
|
|
"learning_rate": 9.975206977051919e-06,
|
|
"loss": 4.675893783569336,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.3870798319327731,
|
|
"grad_norm": 15.76881007178103,
|
|
"learning_rate": 9.974902090561331e-06,
|
|
"loss": 2.786167621612549,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.38760504201680673,
|
|
"grad_norm": 10.862519522476427,
|
|
"learning_rate": 9.974595345590146e-06,
|
|
"loss": 3.2471139430999756,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.38813025210084034,
|
|
"grad_norm": 10.057154507380792,
|
|
"learning_rate": 9.97428674225295e-06,
|
|
"loss": 3.0831401348114014,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.38865546218487396,
|
|
"grad_norm": 9.033985715456321,
|
|
"learning_rate": 9.973976280665034e-06,
|
|
"loss": 2.7748661041259766,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.38918067226890757,
|
|
"grad_norm": 6.999164319444783,
|
|
"learning_rate": 9.973663960942373e-06,
|
|
"loss": 3.044131278991699,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.3897058823529412,
|
|
"grad_norm": 12.913416251410435,
|
|
"learning_rate": 9.973349783201643e-06,
|
|
"loss": 3.110964775085449,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.3902310924369748,
|
|
"grad_norm": 8.289008376927272,
|
|
"learning_rate": 9.97303374756021e-06,
|
|
"loss": 3.1857125759124756,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.3907563025210084,
|
|
"grad_norm": 12.390872525617006,
|
|
"learning_rate": 9.97271585413614e-06,
|
|
"loss": 2.7156639099121094,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.391281512605042,
|
|
"grad_norm": 9.704288372807804,
|
|
"learning_rate": 9.972396103048184e-06,
|
|
"loss": 2.800199031829834,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.3918067226890756,
|
|
"grad_norm": 5.503059294865545,
|
|
"learning_rate": 9.972074494415794e-06,
|
|
"loss": 2.6597952842712402,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.39233193277310924,
|
|
"grad_norm": 14.629486559448079,
|
|
"learning_rate": 9.971751028359113e-06,
|
|
"loss": 2.8860340118408203,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.39285714285714285,
|
|
"grad_norm": 9.865892989389492,
|
|
"learning_rate": 9.971425704998979e-06,
|
|
"loss": 2.8084075450897217,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.39338235294117646,
|
|
"grad_norm": 13.660807237898522,
|
|
"learning_rate": 9.971098524456925e-06,
|
|
"loss": 2.41198468208313,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.3939075630252101,
|
|
"grad_norm": 16.02642625865953,
|
|
"learning_rate": 9.970769486855175e-06,
|
|
"loss": 2.6530814170837402,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.3944327731092437,
|
|
"grad_norm": 9.93788810955185,
|
|
"learning_rate": 9.970438592316646e-06,
|
|
"loss": 2.247192859649658,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.3949579831932773,
|
|
"grad_norm": 17.000130461529395,
|
|
"learning_rate": 9.970105840964954e-06,
|
|
"loss": 2.9603047370910645,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.3954831932773109,
|
|
"grad_norm": 18.104368897922804,
|
|
"learning_rate": 9.969771232924404e-06,
|
|
"loss": 2.4636471271514893,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.3960084033613445,
|
|
"grad_norm": 11.678759570892067,
|
|
"learning_rate": 9.969434768319994e-06,
|
|
"loss": 2.67864990234375,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.39653361344537813,
|
|
"grad_norm": 9.954516151022306,
|
|
"learning_rate": 9.969096447277421e-06,
|
|
"loss": 2.601876735687256,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.39705882352941174,
|
|
"grad_norm": 10.04194683094288,
|
|
"learning_rate": 9.96875626992307e-06,
|
|
"loss": 2.7278666496276855,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.39758403361344535,
|
|
"grad_norm": 9.859608719702951,
|
|
"learning_rate": 9.968414236384022e-06,
|
|
"loss": 2.5173091888427734,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.39810924369747897,
|
|
"grad_norm": 8.445956395716482,
|
|
"learning_rate": 9.968070346788052e-06,
|
|
"loss": 2.7311062812805176,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.39863445378151263,
|
|
"grad_norm": 11.538558785627409,
|
|
"learning_rate": 9.967724601263624e-06,
|
|
"loss": 2.5492570400238037,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.39915966386554624,
|
|
"grad_norm": 16.59494308416059,
|
|
"learning_rate": 9.967376999939902e-06,
|
|
"loss": 3.0493645668029785,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.39968487394957986,
|
|
"grad_norm": 11.943515039644934,
|
|
"learning_rate": 9.967027542946739e-06,
|
|
"loss": 3.117658853530884,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.40021008403361347,
|
|
"grad_norm": 12.660847363691728,
|
|
"learning_rate": 9.96667623041468e-06,
|
|
"loss": 2.7667980194091797,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.4007352941176471,
|
|
"grad_norm": 11.360923456683278,
|
|
"learning_rate": 9.96632306247497e-06,
|
|
"loss": 3.1187093257904053,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.4012605042016807,
|
|
"grad_norm": 18.860213599392665,
|
|
"learning_rate": 9.965968039259537e-06,
|
|
"loss": 2.6678061485290527,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.4017857142857143,
|
|
"grad_norm": 5.953990702595949,
|
|
"learning_rate": 9.965611160901008e-06,
|
|
"loss": 2.969163417816162,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.4023109243697479,
|
|
"grad_norm": 11.83044277913696,
|
|
"learning_rate": 9.965252427532707e-06,
|
|
"loss": 2.681096315383911,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.4028361344537815,
|
|
"grad_norm": 48.370836508184965,
|
|
"learning_rate": 9.964891839288644e-06,
|
|
"loss": 4.605452537536621,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.40336134453781514,
|
|
"grad_norm": 17.52603230116782,
|
|
"learning_rate": 9.964529396303524e-06,
|
|
"loss": 3.2965004444122314,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.40388655462184875,
|
|
"grad_norm": 9.843106150407221,
|
|
"learning_rate": 9.964165098712745e-06,
|
|
"loss": 3.0530214309692383,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.40441176470588236,
|
|
"grad_norm": 60.38131547853319,
|
|
"learning_rate": 9.9637989466524e-06,
|
|
"loss": 2.8514883518218994,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.40493697478991597,
|
|
"grad_norm": 12.992236346655288,
|
|
"learning_rate": 9.96343094025927e-06,
|
|
"loss": 2.743839979171753,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.4054621848739496,
|
|
"grad_norm": 7.789092687155614,
|
|
"learning_rate": 9.963061079670833e-06,
|
|
"loss": 2.968172788619995,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.4059873949579832,
|
|
"grad_norm": 11.105990576237419,
|
|
"learning_rate": 9.962689365025259e-06,
|
|
"loss": 2.259955406188965,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.4065126050420168,
|
|
"grad_norm": 14.171652130560842,
|
|
"learning_rate": 9.96231579646141e-06,
|
|
"loss": 3.6340267658233643,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.4070378151260504,
|
|
"grad_norm": 7.142663329318153,
|
|
"learning_rate": 9.96194037411884e-06,
|
|
"loss": 2.7550954818725586,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.40756302521008403,
|
|
"grad_norm": 20.856166454476007,
|
|
"learning_rate": 9.961563098137795e-06,
|
|
"loss": 2.0924081802368164,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.40808823529411764,
|
|
"grad_norm": 9.725403375549687,
|
|
"learning_rate": 9.961183968659217e-06,
|
|
"loss": 2.6671392917633057,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.40861344537815125,
|
|
"grad_norm": 18.13216494899129,
|
|
"learning_rate": 9.960802985824734e-06,
|
|
"loss": 3.811979293823242,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.40913865546218486,
|
|
"grad_norm": 9.603302379402304,
|
|
"learning_rate": 9.960420149776674e-06,
|
|
"loss": 3.0309672355651855,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.4096638655462185,
|
|
"grad_norm": 7.60530093348673,
|
|
"learning_rate": 9.960035460658052e-06,
|
|
"loss": 2.6865861415863037,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.4101890756302521,
|
|
"grad_norm": 9.150805363464082,
|
|
"learning_rate": 9.959648918612576e-06,
|
|
"loss": 2.356776714324951,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.4107142857142857,
|
|
"grad_norm": 19.764875833019317,
|
|
"learning_rate": 9.959260523784648e-06,
|
|
"loss": 2.4339756965637207,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.4112394957983193,
|
|
"grad_norm": 6.714849553660578,
|
|
"learning_rate": 9.958870276319364e-06,
|
|
"loss": 2.9411540031433105,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.4117647058823529,
|
|
"grad_norm": 12.882346390209301,
|
|
"learning_rate": 9.958478176362503e-06,
|
|
"loss": 2.516554355621338,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.41228991596638653,
|
|
"grad_norm": 15.075731597661168,
|
|
"learning_rate": 9.958084224060547e-06,
|
|
"loss": 2.9573659896850586,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.41281512605042014,
|
|
"grad_norm": 18.918437370618513,
|
|
"learning_rate": 9.957688419560662e-06,
|
|
"loss": 3.919832229614258,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.41334033613445376,
|
|
"grad_norm": 42.32396530125621,
|
|
"learning_rate": 9.957290763010714e-06,
|
|
"loss": 3.0770509243011475,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.41386554621848737,
|
|
"grad_norm": 33.11870145412547,
|
|
"learning_rate": 9.95689125455925e-06,
|
|
"loss": 3.0764617919921875,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.41439075630252103,
|
|
"grad_norm": 14.606636259089811,
|
|
"learning_rate": 9.956489894355521e-06,
|
|
"loss": 2.8072032928466797,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.41491596638655465,
|
|
"grad_norm": 12.868362449244204,
|
|
"learning_rate": 9.95608668254946e-06,
|
|
"loss": 2.5233964920043945,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.41544117647058826,
|
|
"grad_norm": 17.899517071794047,
|
|
"learning_rate": 9.955681619291695e-06,
|
|
"loss": 3.2196409702301025,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.41596638655462187,
|
|
"grad_norm": 11.84349501647458,
|
|
"learning_rate": 9.955274704733547e-06,
|
|
"loss": 2.637895107269287,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.4164915966386555,
|
|
"grad_norm": 13.051509389596445,
|
|
"learning_rate": 9.954865939027028e-06,
|
|
"loss": 2.5700843334198,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.4170168067226891,
|
|
"grad_norm": 22.4530983602669,
|
|
"learning_rate": 9.95445532232484e-06,
|
|
"loss": 3.3792872428894043,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.4175420168067227,
|
|
"grad_norm": 16.66009099744122,
|
|
"learning_rate": 9.954042854780381e-06,
|
|
"loss": 2.5564870834350586,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.4180672268907563,
|
|
"grad_norm": 16.41759956373412,
|
|
"learning_rate": 9.953628536547732e-06,
|
|
"loss": 2.229680061340332,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.4185924369747899,
|
|
"grad_norm": 5.95355444174316,
|
|
"learning_rate": 9.953212367781675e-06,
|
|
"loss": 2.8148036003112793,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.41911764705882354,
|
|
"grad_norm": 11.288180536237046,
|
|
"learning_rate": 9.952794348637674e-06,
|
|
"loss": 4.047574520111084,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.41964285714285715,
|
|
"grad_norm": 9.68707893790397,
|
|
"learning_rate": 9.952374479271894e-06,
|
|
"loss": 2.795058012008667,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.42016806722689076,
|
|
"grad_norm": 28.32561829635747,
|
|
"learning_rate": 9.95195275984118e-06,
|
|
"loss": 3.3051700592041016,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.4206932773109244,
|
|
"grad_norm": 11.875533469159889,
|
|
"learning_rate": 9.95152919050308e-06,
|
|
"loss": 2.640284299850464,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.421218487394958,
|
|
"grad_norm": 16.848698170104978,
|
|
"learning_rate": 9.951103771415826e-06,
|
|
"loss": 2.74238920211792,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.4217436974789916,
|
|
"grad_norm": 12.024301086715802,
|
|
"learning_rate": 9.95067650273834e-06,
|
|
"loss": 2.653933048248291,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.4222689075630252,
|
|
"grad_norm": 9.244446014771947,
|
|
"learning_rate": 9.95024738463024e-06,
|
|
"loss": 2.8780946731567383,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.4227941176470588,
|
|
"grad_norm": 13.759649085993791,
|
|
"learning_rate": 9.949816417251831e-06,
|
|
"loss": 2.6063790321350098,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.42331932773109243,
|
|
"grad_norm": 6.105600186230813,
|
|
"learning_rate": 9.949383600764112e-06,
|
|
"loss": 2.5861740112304688,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.42384453781512604,
|
|
"grad_norm": 11.61006434807629,
|
|
"learning_rate": 9.948948935328766e-06,
|
|
"loss": 2.3922276496887207,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.42436974789915966,
|
|
"grad_norm": 21.320775980203457,
|
|
"learning_rate": 9.948512421108175e-06,
|
|
"loss": 2.4385743141174316,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.42489495798319327,
|
|
"grad_norm": 15.837796454311238,
|
|
"learning_rate": 9.948074058265409e-06,
|
|
"loss": 3.436558246612549,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.4254201680672269,
|
|
"grad_norm": 12.301838472836549,
|
|
"learning_rate": 9.947633846964225e-06,
|
|
"loss": 2.4982149600982666,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.4259453781512605,
|
|
"grad_norm": 10.290693200383759,
|
|
"learning_rate": 9.947191787369075e-06,
|
|
"loss": 2.629467248916626,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.4264705882352941,
|
|
"grad_norm": 15.301896288000805,
|
|
"learning_rate": 9.946747879645101e-06,
|
|
"loss": 2.895120620727539,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.4269957983193277,
|
|
"grad_norm": 12.005977233483202,
|
|
"learning_rate": 9.94630212395813e-06,
|
|
"loss": 2.616076946258545,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.4275210084033613,
|
|
"grad_norm": 9.06535556509353,
|
|
"learning_rate": 9.94585452047469e-06,
|
|
"loss": 3.250288486480713,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.42804621848739494,
|
|
"grad_norm": 7.33540283021128,
|
|
"learning_rate": 9.945405069361985e-06,
|
|
"loss": 2.5006253719329834,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.42857142857142855,
|
|
"grad_norm": 7.137745650856612,
|
|
"learning_rate": 9.944953770787924e-06,
|
|
"loss": 2.8030080795288086,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.42909663865546216,
|
|
"grad_norm": 22.712138997813216,
|
|
"learning_rate": 9.944500624921094e-06,
|
|
"loss": 2.7877559661865234,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.42962184873949577,
|
|
"grad_norm": 22.264028742258198,
|
|
"learning_rate": 9.944045631930782e-06,
|
|
"loss": 2.2608823776245117,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.43014705882352944,
|
|
"grad_norm": 10.743172030391833,
|
|
"learning_rate": 9.943588791986956e-06,
|
|
"loss": 2.655961036682129,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.43067226890756305,
|
|
"grad_norm": 15.720922909163425,
|
|
"learning_rate": 9.943130105260281e-06,
|
|
"loss": 2.954380750656128,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.43119747899159666,
|
|
"grad_norm": 8.442283187335915,
|
|
"learning_rate": 9.942669571922108e-06,
|
|
"loss": 2.827402114868164,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.4317226890756303,
|
|
"grad_norm": 13.36121904866565,
|
|
"learning_rate": 9.94220719214448e-06,
|
|
"loss": 2.8097915649414062,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.4322478991596639,
|
|
"grad_norm": 12.94135436654749,
|
|
"learning_rate": 9.941742966100128e-06,
|
|
"loss": 2.3562488555908203,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.4327731092436975,
|
|
"grad_norm": 19.987951318309037,
|
|
"learning_rate": 9.941276893962472e-06,
|
|
"loss": 3.1615231037139893,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.4332983193277311,
|
|
"grad_norm": 11.597688274021062,
|
|
"learning_rate": 9.940808975905627e-06,
|
|
"loss": 2.806821584701538,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.4338235294117647,
|
|
"grad_norm": 14.77858087554389,
|
|
"learning_rate": 9.94033921210439e-06,
|
|
"loss": 3.246838331222534,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.43434873949579833,
|
|
"grad_norm": 12.827285468107656,
|
|
"learning_rate": 9.939867602734255e-06,
|
|
"loss": 3.145732879638672,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.43487394957983194,
|
|
"grad_norm": 12.195643133192908,
|
|
"learning_rate": 9.939394147971398e-06,
|
|
"loss": 2.699291706085205,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.43539915966386555,
|
|
"grad_norm": 8.998726860136895,
|
|
"learning_rate": 9.93891884799269e-06,
|
|
"loss": 2.623117446899414,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.43592436974789917,
|
|
"grad_norm": 9.88074255664715,
|
|
"learning_rate": 9.938441702975689e-06,
|
|
"loss": 2.0797412395477295,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.4364495798319328,
|
|
"grad_norm": 24.586238736363498,
|
|
"learning_rate": 9.937962713098644e-06,
|
|
"loss": 2.6907668113708496,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.4369747899159664,
|
|
"grad_norm": 13.438904929323614,
|
|
"learning_rate": 9.93748187854049e-06,
|
|
"loss": 2.883826732635498,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.4375,
|
|
"grad_norm": 26.15623990553852,
|
|
"learning_rate": 9.936999199480854e-06,
|
|
"loss": 3.016101837158203,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.4380252100840336,
|
|
"grad_norm": 12.732848530423388,
|
|
"learning_rate": 9.936514676100049e-06,
|
|
"loss": 2.8473546504974365,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.4385504201680672,
|
|
"grad_norm": 10.413089643503326,
|
|
"learning_rate": 9.936028308579083e-06,
|
|
"loss": 2.980785369873047,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.43907563025210083,
|
|
"grad_norm": 8.278903410838087,
|
|
"learning_rate": 9.935540097099645e-06,
|
|
"loss": 2.812281847000122,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.43960084033613445,
|
|
"grad_norm": 14.231041555841777,
|
|
"learning_rate": 9.935050041844121e-06,
|
|
"loss": 3.268686532974243,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.44012605042016806,
|
|
"grad_norm": 11.174600937877548,
|
|
"learning_rate": 9.934558142995577e-06,
|
|
"loss": 2.3432607650756836,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.44065126050420167,
|
|
"grad_norm": 10.860728364169903,
|
|
"learning_rate": 9.934064400737776e-06,
|
|
"loss": 1.6175007820129395,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.4411764705882353,
|
|
"grad_norm": 8.8522305019082,
|
|
"learning_rate": 9.933568815255161e-06,
|
|
"loss": 2.0455827713012695,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.4417016806722689,
|
|
"grad_norm": 19.059749722910492,
|
|
"learning_rate": 9.933071386732874e-06,
|
|
"loss": 2.358410358428955,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.4422268907563025,
|
|
"grad_norm": 14.716117524810409,
|
|
"learning_rate": 9.932572115356738e-06,
|
|
"loss": 2.637843370437622,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.4427521008403361,
|
|
"grad_norm": 18.457289432595132,
|
|
"learning_rate": 9.932071001313265e-06,
|
|
"loss": 3.0861763954162598,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.4432773109243697,
|
|
"grad_norm": 21.047348762353046,
|
|
"learning_rate": 9.931568044789661e-06,
|
|
"loss": 3.025874137878418,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.44380252100840334,
|
|
"grad_norm": 8.402545229270334,
|
|
"learning_rate": 9.931063245973812e-06,
|
|
"loss": 1.9306225776672363,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.44432773109243695,
|
|
"grad_norm": 10.116635921119325,
|
|
"learning_rate": 9.930556605054295e-06,
|
|
"loss": 2.9862217903137207,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.44485294117647056,
|
|
"grad_norm": 5.691637581095164,
|
|
"learning_rate": 9.93004812222038e-06,
|
|
"loss": 2.0409066677093506,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.44537815126050423,
|
|
"grad_norm": 20.385980345273836,
|
|
"learning_rate": 9.929537797662022e-06,
|
|
"loss": 2.8674979209899902,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.44590336134453784,
|
|
"grad_norm": 24.592665709309212,
|
|
"learning_rate": 9.929025631569864e-06,
|
|
"loss": 3.1542892456054688,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.44642857142857145,
|
|
"grad_norm": 10.457336827215615,
|
|
"learning_rate": 9.928511624135233e-06,
|
|
"loss": 2.3662195205688477,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.44695378151260506,
|
|
"grad_norm": 16.156344686329113,
|
|
"learning_rate": 9.927995775550148e-06,
|
|
"loss": 2.897277355194092,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.4474789915966387,
|
|
"grad_norm": 7.458575180777569,
|
|
"learning_rate": 9.927478086007316e-06,
|
|
"loss": 3.179873466491699,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.4480042016806723,
|
|
"grad_norm": 10.618063286275696,
|
|
"learning_rate": 9.926958555700134e-06,
|
|
"loss": 3.3985543251037598,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.4485294117647059,
|
|
"grad_norm": 25.100942160328817,
|
|
"learning_rate": 9.926437184822679e-06,
|
|
"loss": 2.3599376678466797,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.4490546218487395,
|
|
"grad_norm": 6.834921407287759,
|
|
"learning_rate": 9.925913973569724e-06,
|
|
"loss": 2.82316517829895,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.4495798319327731,
|
|
"grad_norm": 23.26517440469458,
|
|
"learning_rate": 9.925388922136723e-06,
|
|
"loss": 2.540208339691162,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.45010504201680673,
|
|
"grad_norm": 23.625214809457713,
|
|
"learning_rate": 9.924862030719821e-06,
|
|
"loss": 3.0775671005249023,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.45063025210084034,
|
|
"grad_norm": 12.594553707506398,
|
|
"learning_rate": 9.924333299515849e-06,
|
|
"loss": 2.802736759185791,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.45115546218487396,
|
|
"grad_norm": 13.37962919758712,
|
|
"learning_rate": 9.923802728722326e-06,
|
|
"loss": 2.6349904537200928,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.45168067226890757,
|
|
"grad_norm": 9.76251844426648,
|
|
"learning_rate": 9.92327031853746e-06,
|
|
"loss": 2.773221015930176,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.4522058823529412,
|
|
"grad_norm": 8.272212081755562,
|
|
"learning_rate": 9.922736069160141e-06,
|
|
"loss": 2.677144765853882,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.4527310924369748,
|
|
"grad_norm": 21.64902996299887,
|
|
"learning_rate": 9.922199980789953e-06,
|
|
"loss": 2.4768295288085938,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.4532563025210084,
|
|
"grad_norm": 14.574329775895738,
|
|
"learning_rate": 9.92166205362716e-06,
|
|
"loss": 3.221784830093384,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.453781512605042,
|
|
"grad_norm": 14.831081804998556,
|
|
"learning_rate": 9.921122287872715e-06,
|
|
"loss": 2.8046839237213135,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.4543067226890756,
|
|
"grad_norm": 15.585592308183534,
|
|
"learning_rate": 9.920580683728263e-06,
|
|
"loss": 2.464376926422119,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.45483193277310924,
|
|
"grad_norm": 8.324442680101022,
|
|
"learning_rate": 9.920037241396129e-06,
|
|
"loss": 2.3783860206604004,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.45535714285714285,
|
|
"grad_norm": 8.676053686038793,
|
|
"learning_rate": 9.91949196107933e-06,
|
|
"loss": 2.8128809928894043,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.45588235294117646,
|
|
"grad_norm": 19.311087301833243,
|
|
"learning_rate": 9.918944842981564e-06,
|
|
"loss": 2.9722740650177,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.4564075630252101,
|
|
"grad_norm": 18.79648430901269,
|
|
"learning_rate": 9.918395887307219e-06,
|
|
"loss": 2.4842166900634766,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.4569327731092437,
|
|
"grad_norm": 7.373131879784598,
|
|
"learning_rate": 9.917845094261372e-06,
|
|
"loss": 2.549220561981201,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.4574579831932773,
|
|
"grad_norm": 16.88136657369639,
|
|
"learning_rate": 9.91729246404978e-06,
|
|
"loss": 2.625797748565674,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.4579831932773109,
|
|
"grad_norm": 14.363476422275891,
|
|
"learning_rate": 9.916737996878894e-06,
|
|
"loss": 2.4376330375671387,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.4585084033613445,
|
|
"grad_norm": 13.177614705966699,
|
|
"learning_rate": 9.916181692955841e-06,
|
|
"loss": 2.9835891723632812,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.45903361344537813,
|
|
"grad_norm": 14.72919808589754,
|
|
"learning_rate": 9.915623552488448e-06,
|
|
"loss": 2.9650497436523438,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.45955882352941174,
|
|
"grad_norm": 18.542956700150686,
|
|
"learning_rate": 9.915063575685212e-06,
|
|
"loss": 2.6515233516693115,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.46008403361344535,
|
|
"grad_norm": 22.222519275286896,
|
|
"learning_rate": 9.914501762755328e-06,
|
|
"loss": 3.233870506286621,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.46060924369747897,
|
|
"grad_norm": 11.454153364063668,
|
|
"learning_rate": 9.913938113908675e-06,
|
|
"loss": 3.0363383293151855,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.46113445378151263,
|
|
"grad_norm": 14.735364528868368,
|
|
"learning_rate": 9.913372629355814e-06,
|
|
"loss": 2.7695579528808594,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.46165966386554624,
|
|
"grad_norm": 81.93776974235087,
|
|
"learning_rate": 9.912805309307994e-06,
|
|
"loss": 3.1072168350219727,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.46218487394957986,
|
|
"grad_norm": 15.513658898749222,
|
|
"learning_rate": 9.912236153977149e-06,
|
|
"loss": 2.9209001064300537,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.46271008403361347,
|
|
"grad_norm": 10.31433168176148,
|
|
"learning_rate": 9.9116651635759e-06,
|
|
"loss": 2.876655101776123,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.4632352941176471,
|
|
"grad_norm": 9.705145505130226,
|
|
"learning_rate": 9.911092338317552e-06,
|
|
"loss": 2.7037758827209473,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.4637605042016807,
|
|
"grad_norm": 11.63673939883521,
|
|
"learning_rate": 9.910517678416097e-06,
|
|
"loss": 3.012422561645508,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.4642857142857143,
|
|
"grad_norm": 11.117643209724529,
|
|
"learning_rate": 9.90994118408621e-06,
|
|
"loss": 2.6030240058898926,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.4648109243697479,
|
|
"grad_norm": 12.829617835999944,
|
|
"learning_rate": 9.909362855543253e-06,
|
|
"loss": 2.7769675254821777,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.4653361344537815,
|
|
"grad_norm": 11.180701627990615,
|
|
"learning_rate": 9.908782693003273e-06,
|
|
"loss": 2.954176425933838,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.46586134453781514,
|
|
"grad_norm": 14.934695274123843,
|
|
"learning_rate": 9.908200696683004e-06,
|
|
"loss": 1.9919966459274292,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.46638655462184875,
|
|
"grad_norm": 18.64002973237268,
|
|
"learning_rate": 9.907616866799862e-06,
|
|
"loss": 2.660306692123413,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.46691176470588236,
|
|
"grad_norm": 8.698588008261643,
|
|
"learning_rate": 9.907031203571948e-06,
|
|
"loss": 2.636436939239502,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.46743697478991597,
|
|
"grad_norm": 12.701214516915318,
|
|
"learning_rate": 9.90644370721805e-06,
|
|
"loss": 3.0882530212402344,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.4679621848739496,
|
|
"grad_norm": 17.167677117516885,
|
|
"learning_rate": 9.90585437795764e-06,
|
|
"loss": 2.685439348220825,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.4684873949579832,
|
|
"grad_norm": 9.862494171319884,
|
|
"learning_rate": 9.905263216010873e-06,
|
|
"loss": 3.0536980628967285,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.4690126050420168,
|
|
"grad_norm": 28.063467363917898,
|
|
"learning_rate": 9.90467022159859e-06,
|
|
"loss": 2.772045612335205,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.4695378151260504,
|
|
"grad_norm": 13.730481186958547,
|
|
"learning_rate": 9.904075394942319e-06,
|
|
"loss": 2.8109469413757324,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.47006302521008403,
|
|
"grad_norm": 10.458735326832665,
|
|
"learning_rate": 9.90347873626427e-06,
|
|
"loss": 3.1038899421691895,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.47058823529411764,
|
|
"grad_norm": 8.942485794096514,
|
|
"learning_rate": 9.902880245787335e-06,
|
|
"loss": 2.6819446086883545,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.47111344537815125,
|
|
"grad_norm": 15.133420182005043,
|
|
"learning_rate": 9.902279923735093e-06,
|
|
"loss": 2.5943500995635986,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.47163865546218486,
|
|
"grad_norm": 44.287818347999504,
|
|
"learning_rate": 9.90167777033181e-06,
|
|
"loss": 2.56186580657959,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.4721638655462185,
|
|
"grad_norm": 12.179010723789636,
|
|
"learning_rate": 9.901073785802433e-06,
|
|
"loss": 2.842773914337158,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.4726890756302521,
|
|
"grad_norm": 10.910211922784503,
|
|
"learning_rate": 9.90046797037259e-06,
|
|
"loss": 2.7502799034118652,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.4732142857142857,
|
|
"grad_norm": 12.247715820306853,
|
|
"learning_rate": 9.899860324268599e-06,
|
|
"loss": 2.9904348850250244,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.4737394957983193,
|
|
"grad_norm": 8.397186831988368,
|
|
"learning_rate": 9.899250847717458e-06,
|
|
"loss": 2.300837993621826,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.4742647058823529,
|
|
"grad_norm": 6.757791574547933,
|
|
"learning_rate": 9.89863954094685e-06,
|
|
"loss": 3.03016996383667,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 0.47478991596638653,
|
|
"grad_norm": 11.149010584995784,
|
|
"learning_rate": 9.898026404185142e-06,
|
|
"loss": 3.3881657123565674,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.47531512605042014,
|
|
"grad_norm": 9.926001688009785,
|
|
"learning_rate": 9.897411437661386e-06,
|
|
"loss": 3.2900993824005127,
|
|
"step": 905
|
|
},
|
|
{
|
|
"epoch": 0.47584033613445376,
|
|
"grad_norm": 14.484467714997923,
|
|
"learning_rate": 9.896794641605311e-06,
|
|
"loss": 2.8626081943511963,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.47636554621848737,
|
|
"grad_norm": 8.178069070434272,
|
|
"learning_rate": 9.89617601624734e-06,
|
|
"loss": 2.818047046661377,
|
|
"step": 907
|
|
},
|
|
{
|
|
"epoch": 0.47689075630252103,
|
|
"grad_norm": 10.1232775019755,
|
|
"learning_rate": 9.89555556181857e-06,
|
|
"loss": 2.754782199859619,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.47741596638655465,
|
|
"grad_norm": 37.59288764613423,
|
|
"learning_rate": 9.894933278550785e-06,
|
|
"loss": 2.760875940322876,
|
|
"step": 909
|
|
},
|
|
{
|
|
"epoch": 0.47794117647058826,
|
|
"grad_norm": 8.362808010440565,
|
|
"learning_rate": 9.894309166676454e-06,
|
|
"loss": 3.0588538646698,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.47846638655462187,
|
|
"grad_norm": 11.040059976714263,
|
|
"learning_rate": 9.893683226428727e-06,
|
|
"loss": 2.7186901569366455,
|
|
"step": 911
|
|
},
|
|
{
|
|
"epoch": 0.4789915966386555,
|
|
"grad_norm": 15.884369116169045,
|
|
"learning_rate": 9.893055458041435e-06,
|
|
"loss": 2.453359365463257,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.4795168067226891,
|
|
"grad_norm": 11.150449224857894,
|
|
"learning_rate": 9.8924258617491e-06,
|
|
"loss": 2.9110960960388184,
|
|
"step": 913
|
|
},
|
|
{
|
|
"epoch": 0.4800420168067227,
|
|
"grad_norm": 9.861777410746715,
|
|
"learning_rate": 9.891794437786913e-06,
|
|
"loss": 2.450448513031006,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.4805672268907563,
|
|
"grad_norm": 8.704856714943997,
|
|
"learning_rate": 9.891161186390762e-06,
|
|
"loss": 2.253525733947754,
|
|
"step": 915
|
|
},
|
|
{
|
|
"epoch": 0.4810924369747899,
|
|
"grad_norm": 11.210249714468798,
|
|
"learning_rate": 9.89052610779721e-06,
|
|
"loss": 2.6373140811920166,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.48161764705882354,
|
|
"grad_norm": 11.331411616830994,
|
|
"learning_rate": 9.8898892022435e-06,
|
|
"loss": 2.8883016109466553,
|
|
"step": 917
|
|
},
|
|
{
|
|
"epoch": 0.48214285714285715,
|
|
"grad_norm": 14.169413016326956,
|
|
"learning_rate": 9.88925046996757e-06,
|
|
"loss": 2.789289951324463,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.48266806722689076,
|
|
"grad_norm": 9.823475437104284,
|
|
"learning_rate": 9.888609911208024e-06,
|
|
"loss": 2.451218605041504,
|
|
"step": 919
|
|
},
|
|
{
|
|
"epoch": 0.4831932773109244,
|
|
"grad_norm": 13.071774200371857,
|
|
"learning_rate": 9.88796752620416e-06,
|
|
"loss": 2.9068379402160645,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.483718487394958,
|
|
"grad_norm": 7.228222229862132,
|
|
"learning_rate": 9.887323315195956e-06,
|
|
"loss": 2.5686075687408447,
|
|
"step": 921
|
|
},
|
|
{
|
|
"epoch": 0.4842436974789916,
|
|
"grad_norm": 13.97857355451438,
|
|
"learning_rate": 9.886677278424067e-06,
|
|
"loss": 3.3692898750305176,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.4847689075630252,
|
|
"grad_norm": 13.189205905556747,
|
|
"learning_rate": 9.886029416129837e-06,
|
|
"loss": 2.647993803024292,
|
|
"step": 923
|
|
},
|
|
{
|
|
"epoch": 0.4852941176470588,
|
|
"grad_norm": 9.959531959726037,
|
|
"learning_rate": 9.885379728555287e-06,
|
|
"loss": 2.748720645904541,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.48581932773109243,
|
|
"grad_norm": 8.566845023372595,
|
|
"learning_rate": 9.884728215943122e-06,
|
|
"loss": 2.3783226013183594,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.48634453781512604,
|
|
"grad_norm": 10.393397255084187,
|
|
"learning_rate": 9.88407487853673e-06,
|
|
"loss": 2.561636209487915,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.48686974789915966,
|
|
"grad_norm": 6.2649523285799615,
|
|
"learning_rate": 9.883419716580176e-06,
|
|
"loss": 3.1514012813568115,
|
|
"step": 927
|
|
},
|
|
{
|
|
"epoch": 0.48739495798319327,
|
|
"grad_norm": 10.114420618847973,
|
|
"learning_rate": 9.882762730318211e-06,
|
|
"loss": 2.6970512866973877,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.4879201680672269,
|
|
"grad_norm": 8.933523608286832,
|
|
"learning_rate": 9.882103919996268e-06,
|
|
"loss": 3.23095703125,
|
|
"step": 929
|
|
},
|
|
{
|
|
"epoch": 0.4884453781512605,
|
|
"grad_norm": 7.397712401835694,
|
|
"learning_rate": 9.88144328586046e-06,
|
|
"loss": 2.8616130352020264,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.4889705882352941,
|
|
"grad_norm": 8.29616801424702,
|
|
"learning_rate": 9.880780828157574e-06,
|
|
"loss": 3.153644561767578,
|
|
"step": 931
|
|
},
|
|
{
|
|
"epoch": 0.4894957983193277,
|
|
"grad_norm": 9.896148542582118,
|
|
"learning_rate": 9.880116547135094e-06,
|
|
"loss": 2.85158109664917,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.4900210084033613,
|
|
"grad_norm": 9.7373498009189,
|
|
"learning_rate": 9.879450443041172e-06,
|
|
"loss": 2.8021767139434814,
|
|
"step": 933
|
|
},
|
|
{
|
|
"epoch": 0.49054621848739494,
|
|
"grad_norm": 18.346372110792238,
|
|
"learning_rate": 9.878782516124646e-06,
|
|
"loss": 2.8704347610473633,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.49107142857142855,
|
|
"grad_norm": 10.048721317398819,
|
|
"learning_rate": 9.878112766635035e-06,
|
|
"loss": 2.824406385421753,
|
|
"step": 935
|
|
},
|
|
{
|
|
"epoch": 0.49159663865546216,
|
|
"grad_norm": 12.81702897470756,
|
|
"learning_rate": 9.877441194822537e-06,
|
|
"loss": 3.4565329551696777,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.49212184873949577,
|
|
"grad_norm": 22.2970212401919,
|
|
"learning_rate": 9.876767800938032e-06,
|
|
"loss": 2.3091888427734375,
|
|
"step": 937
|
|
},
|
|
{
|
|
"epoch": 0.49264705882352944,
|
|
"grad_norm": 10.793653974784883,
|
|
"learning_rate": 9.876092585233081e-06,
|
|
"loss": 3.0196175575256348,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.49317226890756305,
|
|
"grad_norm": 10.708288001889315,
|
|
"learning_rate": 9.875415547959926e-06,
|
|
"loss": 2.4891157150268555,
|
|
"step": 939
|
|
},
|
|
{
|
|
"epoch": 0.49369747899159666,
|
|
"grad_norm": 8.340641456156122,
|
|
"learning_rate": 9.874736689371487e-06,
|
|
"loss": 2.7188596725463867,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.4942226890756303,
|
|
"grad_norm": 15.558688033349009,
|
|
"learning_rate": 9.874056009721367e-06,
|
|
"loss": 2.205338478088379,
|
|
"step": 941
|
|
},
|
|
{
|
|
"epoch": 0.4947478991596639,
|
|
"grad_norm": 14.595148331349147,
|
|
"learning_rate": 9.87337350926385e-06,
|
|
"loss": 3.069061040878296,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.4952731092436975,
|
|
"grad_norm": 10.50970053280589,
|
|
"learning_rate": 9.872689188253895e-06,
|
|
"loss": 3.2438502311706543,
|
|
"step": 943
|
|
},
|
|
{
|
|
"epoch": 0.4957983193277311,
|
|
"grad_norm": 11.852469887801579,
|
|
"learning_rate": 9.872003046947148e-06,
|
|
"loss": 2.4861674308776855,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.4963235294117647,
|
|
"grad_norm": 12.881604766503324,
|
|
"learning_rate": 9.87131508559993e-06,
|
|
"loss": 2.827747344970703,
|
|
"step": 945
|
|
},
|
|
{
|
|
"epoch": 0.49684873949579833,
|
|
"grad_norm": 31.509644437963562,
|
|
"learning_rate": 9.870625304469244e-06,
|
|
"loss": 3.669290065765381,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.49737394957983194,
|
|
"grad_norm": 11.243025059684005,
|
|
"learning_rate": 9.869933703812773e-06,
|
|
"loss": 2.615156650543213,
|
|
"step": 947
|
|
},
|
|
{
|
|
"epoch": 0.49789915966386555,
|
|
"grad_norm": 10.604615572148322,
|
|
"learning_rate": 9.869240283888879e-06,
|
|
"loss": 3.173630475997925,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.49842436974789917,
|
|
"grad_norm": 9.452663488404353,
|
|
"learning_rate": 9.868545044956603e-06,
|
|
"loss": 2.46535062789917,
|
|
"step": 949
|
|
},
|
|
{
|
|
"epoch": 0.4989495798319328,
|
|
"grad_norm": 14.578447019798483,
|
|
"learning_rate": 9.867847987275665e-06,
|
|
"loss": 2.3685851097106934,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.4994747899159664,
|
|
"grad_norm": 16.015486062482058,
|
|
"learning_rate": 9.867149111106469e-06,
|
|
"loss": 3.4084792137145996,
|
|
"step": 951
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 8.172727766916283,
|
|
"learning_rate": 9.866448416710094e-06,
|
|
"loss": 2.634758472442627,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.5005252100840336,
|
|
"grad_norm": 7.061421839836941,
|
|
"learning_rate": 9.865745904348296e-06,
|
|
"loss": 3.1642332077026367,
|
|
"step": 953
|
|
},
|
|
{
|
|
"epoch": 0.5010504201680672,
|
|
"grad_norm": 7.863379368881913,
|
|
"learning_rate": 9.865041574283519e-06,
|
|
"loss": 2.55232310295105,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.5015756302521008,
|
|
"grad_norm": 18.318706645727392,
|
|
"learning_rate": 9.864335426778877e-06,
|
|
"loss": 3.0581870079040527,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 0.5021008403361344,
|
|
"grad_norm": 8.24819656304432,
|
|
"learning_rate": 9.863627462098166e-06,
|
|
"loss": 3.321889638900757,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.5026260504201681,
|
|
"grad_norm": 13.940058316546246,
|
|
"learning_rate": 9.862917680505863e-06,
|
|
"loss": 2.7758915424346924,
|
|
"step": 957
|
|
},
|
|
{
|
|
"epoch": 0.5031512605042017,
|
|
"grad_norm": 7.790005951709683,
|
|
"learning_rate": 9.862206082267123e-06,
|
|
"loss": 2.8979663848876953,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.5036764705882353,
|
|
"grad_norm": 31.609464745133295,
|
|
"learning_rate": 9.861492667647776e-06,
|
|
"loss": 3.6580424308776855,
|
|
"step": 959
|
|
},
|
|
{
|
|
"epoch": 0.5042016806722689,
|
|
"grad_norm": 8.822434156970964,
|
|
"learning_rate": 9.860777436914334e-06,
|
|
"loss": 2.9936790466308594,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5047268907563025,
|
|
"grad_norm": 12.867974272732651,
|
|
"learning_rate": 9.860060390333988e-06,
|
|
"loss": 2.7880330085754395,
|
|
"step": 961
|
|
},
|
|
{
|
|
"epoch": 0.5052521008403361,
|
|
"grad_norm": 10.290823503263832,
|
|
"learning_rate": 9.859341528174604e-06,
|
|
"loss": 2.471447467803955,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.5057773109243697,
|
|
"grad_norm": 12.483830449845785,
|
|
"learning_rate": 9.85862085070473e-06,
|
|
"loss": 3.259432792663574,
|
|
"step": 963
|
|
},
|
|
{
|
|
"epoch": 0.5063025210084033,
|
|
"grad_norm": 14.628462597437963,
|
|
"learning_rate": 9.85789835819359e-06,
|
|
"loss": 2.4535512924194336,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.506827731092437,
|
|
"grad_norm": 7.249075888465973,
|
|
"learning_rate": 9.857174050911085e-06,
|
|
"loss": 2.0006537437438965,
|
|
"step": 965
|
|
},
|
|
{
|
|
"epoch": 0.5073529411764706,
|
|
"grad_norm": 7.472949235353495,
|
|
"learning_rate": 9.856447929127797e-06,
|
|
"loss": 1.8998805284500122,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.5078781512605042,
|
|
"grad_norm": 40.11983502167645,
|
|
"learning_rate": 9.855719993114983e-06,
|
|
"loss": 2.6687722206115723,
|
|
"step": 967
|
|
},
|
|
{
|
|
"epoch": 0.5084033613445378,
|
|
"grad_norm": 7.657658631890199,
|
|
"learning_rate": 9.854990243144583e-06,
|
|
"loss": 1.9068760871887207,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.5089285714285714,
|
|
"grad_norm": 10.387516235717962,
|
|
"learning_rate": 9.854258679489203e-06,
|
|
"loss": 3.62998104095459,
|
|
"step": 969
|
|
},
|
|
{
|
|
"epoch": 0.509453781512605,
|
|
"grad_norm": 17.102277052214465,
|
|
"learning_rate": 9.853525302422143e-06,
|
|
"loss": 3.2284932136535645,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5099789915966386,
|
|
"grad_norm": 13.344277208761309,
|
|
"learning_rate": 9.852790112217364e-06,
|
|
"loss": 2.899284839630127,
|
|
"step": 971
|
|
},
|
|
{
|
|
"epoch": 0.5105042016806722,
|
|
"grad_norm": 10.892894759474302,
|
|
"learning_rate": 9.852053109149518e-06,
|
|
"loss": 2.663513660430908,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.5110294117647058,
|
|
"grad_norm": 14.239491096036769,
|
|
"learning_rate": 9.851314293493923e-06,
|
|
"loss": 2.841151237487793,
|
|
"step": 973
|
|
},
|
|
{
|
|
"epoch": 0.5115546218487395,
|
|
"grad_norm": 9.049345457055445,
|
|
"learning_rate": 9.850573665526583e-06,
|
|
"loss": 3.1498963832855225,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.5120798319327731,
|
|
"grad_norm": 24.137043144127464,
|
|
"learning_rate": 9.849831225524174e-06,
|
|
"loss": 2.4141013622283936,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.5126050420168067,
|
|
"grad_norm": 12.293035374600452,
|
|
"learning_rate": 9.849086973764049e-06,
|
|
"loss": 2.7569313049316406,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.5131302521008403,
|
|
"grad_norm": 11.148606706292894,
|
|
"learning_rate": 9.848340910524243e-06,
|
|
"loss": 2.7346744537353516,
|
|
"step": 977
|
|
},
|
|
{
|
|
"epoch": 0.5136554621848739,
|
|
"grad_norm": 11.90459404326738,
|
|
"learning_rate": 9.847593036083457e-06,
|
|
"loss": 2.7461304664611816,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.5141806722689075,
|
|
"grad_norm": 13.455479167498735,
|
|
"learning_rate": 9.846843350721084e-06,
|
|
"loss": 2.8733315467834473,
|
|
"step": 979
|
|
},
|
|
{
|
|
"epoch": 0.5147058823529411,
|
|
"grad_norm": 14.47955515122071,
|
|
"learning_rate": 9.846091854717179e-06,
|
|
"loss": 3.1680872440338135,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.5152310924369747,
|
|
"grad_norm": 17.18200775081459,
|
|
"learning_rate": 9.845338548352482e-06,
|
|
"loss": 2.253500461578369,
|
|
"step": 981
|
|
},
|
|
{
|
|
"epoch": 0.5157563025210085,
|
|
"grad_norm": 7.010827004001508,
|
|
"learning_rate": 9.844583431908404e-06,
|
|
"loss": 2.8670248985290527,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.5162815126050421,
|
|
"grad_norm": 11.724281288596448,
|
|
"learning_rate": 9.843826505667038e-06,
|
|
"loss": 2.728477954864502,
|
|
"step": 983
|
|
},
|
|
{
|
|
"epoch": 0.5168067226890757,
|
|
"grad_norm": 7.890004234599675,
|
|
"learning_rate": 9.843067769911147e-06,
|
|
"loss": 2.5966458320617676,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.5173319327731093,
|
|
"grad_norm": 9.694420979855991,
|
|
"learning_rate": 9.842307224924174e-06,
|
|
"loss": 2.716907024383545,
|
|
"step": 985
|
|
},
|
|
{
|
|
"epoch": 0.5178571428571429,
|
|
"grad_norm": 12.392850667138623,
|
|
"learning_rate": 9.841544870990237e-06,
|
|
"loss": 2.506190776824951,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.5183823529411765,
|
|
"grad_norm": 9.699664556874543,
|
|
"learning_rate": 9.840780708394131e-06,
|
|
"loss": 3.579655647277832,
|
|
"step": 987
|
|
},
|
|
{
|
|
"epoch": 0.5189075630252101,
|
|
"grad_norm": 11.971835575602299,
|
|
"learning_rate": 9.840014737421321e-06,
|
|
"loss": 2.9419727325439453,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.5194327731092437,
|
|
"grad_norm": 11.32733302234166,
|
|
"learning_rate": 9.839246958357957e-06,
|
|
"loss": 2.8499889373779297,
|
|
"step": 989
|
|
},
|
|
{
|
|
"epoch": 0.5199579831932774,
|
|
"grad_norm": 6.737626396843377,
|
|
"learning_rate": 9.838477371490857e-06,
|
|
"loss": 2.7310805320739746,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.520483193277311,
|
|
"grad_norm": 7.7832175497025755,
|
|
"learning_rate": 9.837705977107514e-06,
|
|
"loss": 2.371565103530884,
|
|
"step": 991
|
|
},
|
|
{
|
|
"epoch": 0.5210084033613446,
|
|
"grad_norm": 13.423359517757728,
|
|
"learning_rate": 9.836932775496102e-06,
|
|
"loss": 2.1075167655944824,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.5215336134453782,
|
|
"grad_norm": 30.355785767864948,
|
|
"learning_rate": 9.836157766945467e-06,
|
|
"loss": 2.9195847511291504,
|
|
"step": 993
|
|
},
|
|
{
|
|
"epoch": 0.5220588235294118,
|
|
"grad_norm": 5.9085648950482215,
|
|
"learning_rate": 9.835380951745128e-06,
|
|
"loss": 2.653118133544922,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.5225840336134454,
|
|
"grad_norm": 8.624755576924365,
|
|
"learning_rate": 9.834602330185282e-06,
|
|
"loss": 2.7378313541412354,
|
|
"step": 995
|
|
},
|
|
{
|
|
"epoch": 0.523109243697479,
|
|
"grad_norm": 15.447229177233407,
|
|
"learning_rate": 9.833821902556799e-06,
|
|
"loss": 3.0610413551330566,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.5236344537815126,
|
|
"grad_norm": 14.138344909614812,
|
|
"learning_rate": 9.833039669151225e-06,
|
|
"loss": 2.086059331893921,
|
|
"step": 997
|
|
},
|
|
{
|
|
"epoch": 0.5241596638655462,
|
|
"grad_norm": 25.891742154338104,
|
|
"learning_rate": 9.832255630260781e-06,
|
|
"loss": 2.698242664337158,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.5246848739495799,
|
|
"grad_norm": 13.534664830891156,
|
|
"learning_rate": 9.831469786178359e-06,
|
|
"loss": 2.078580141067505,
|
|
"step": 999
|
|
},
|
|
{
|
|
"epoch": 0.5252100840336135,
|
|
"grad_norm": 20.311726552736538,
|
|
"learning_rate": 9.83068213719753e-06,
|
|
"loss": 3.3323657512664795,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.5257352941176471,
|
|
"grad_norm": 10.316669873142736,
|
|
"learning_rate": 9.829892683612535e-06,
|
|
"loss": 3.5711441040039062,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 0.5262605042016807,
|
|
"grad_norm": 11.07522520228112,
|
|
"learning_rate": 9.829101425718293e-06,
|
|
"loss": 3.0521323680877686,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.5267857142857143,
|
|
"grad_norm": 10.12534774251354,
|
|
"learning_rate": 9.828308363810392e-06,
|
|
"loss": 3.1976470947265625,
|
|
"step": 1003
|
|
},
|
|
{
|
|
"epoch": 0.5273109243697479,
|
|
"grad_norm": 11.155283266128201,
|
|
"learning_rate": 9.827513498185102e-06,
|
|
"loss": 2.9771289825439453,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.5278361344537815,
|
|
"grad_norm": 10.864857422412713,
|
|
"learning_rate": 9.826716829139358e-06,
|
|
"loss": 1.943270206451416,
|
|
"step": 1005
|
|
},
|
|
{
|
|
"epoch": 0.5283613445378151,
|
|
"grad_norm": 10.85085375472981,
|
|
"learning_rate": 9.825918356970776e-06,
|
|
"loss": 3.0275015830993652,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.5288865546218487,
|
|
"grad_norm": 18.464630613651543,
|
|
"learning_rate": 9.82511808197764e-06,
|
|
"loss": 2.402885913848877,
|
|
"step": 1007
|
|
},
|
|
{
|
|
"epoch": 0.5294117647058824,
|
|
"grad_norm": 12.385173244701438,
|
|
"learning_rate": 9.82431600445891e-06,
|
|
"loss": 3.268373966217041,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.529936974789916,
|
|
"grad_norm": 12.78405094599589,
|
|
"learning_rate": 9.82351212471422e-06,
|
|
"loss": 3.336167335510254,
|
|
"step": 1009
|
|
},
|
|
{
|
|
"epoch": 0.5304621848739496,
|
|
"grad_norm": 13.404236320599205,
|
|
"learning_rate": 9.822706443043874e-06,
|
|
"loss": 2.69339656829834,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.5309873949579832,
|
|
"grad_norm": 5.971076613892638,
|
|
"learning_rate": 9.821898959748856e-06,
|
|
"loss": 2.8977434635162354,
|
|
"step": 1011
|
|
},
|
|
{
|
|
"epoch": 0.5315126050420168,
|
|
"grad_norm": 28.995985825273255,
|
|
"learning_rate": 9.821089675130816e-06,
|
|
"loss": 2.4791910648345947,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.5320378151260504,
|
|
"grad_norm": 10.501767999262777,
|
|
"learning_rate": 9.820278589492076e-06,
|
|
"loss": 2.9573850631713867,
|
|
"step": 1013
|
|
},
|
|
{
|
|
"epoch": 0.532563025210084,
|
|
"grad_norm": 16.06805414193256,
|
|
"learning_rate": 9.81946570313564e-06,
|
|
"loss": 2.753512144088745,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.5330882352941176,
|
|
"grad_norm": 18.288090694705243,
|
|
"learning_rate": 9.818651016365177e-06,
|
|
"loss": 2.611023426055908,
|
|
"step": 1015
|
|
},
|
|
{
|
|
"epoch": 0.5336134453781513,
|
|
"grad_norm": 9.715589897537523,
|
|
"learning_rate": 9.817834529485031e-06,
|
|
"loss": 3.0187296867370605,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.5341386554621849,
|
|
"grad_norm": 11.483477923856451,
|
|
"learning_rate": 9.817016242800215e-06,
|
|
"loss": 2.56632661819458,
|
|
"step": 1017
|
|
},
|
|
{
|
|
"epoch": 0.5346638655462185,
|
|
"grad_norm": 6.972928016485257,
|
|
"learning_rate": 9.816196156616422e-06,
|
|
"loss": 2.8569421768188477,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.5351890756302521,
|
|
"grad_norm": 23.941456471416537,
|
|
"learning_rate": 9.81537427124001e-06,
|
|
"loss": 3.0288453102111816,
|
|
"step": 1019
|
|
},
|
|
{
|
|
"epoch": 0.5357142857142857,
|
|
"grad_norm": 10.271680924637222,
|
|
"learning_rate": 9.814550586978012e-06,
|
|
"loss": 2.467142105102539,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.5362394957983193,
|
|
"grad_norm": 29.999809621685696,
|
|
"learning_rate": 9.813725104138133e-06,
|
|
"loss": 3.313124179840088,
|
|
"step": 1021
|
|
},
|
|
{
|
|
"epoch": 0.5367647058823529,
|
|
"grad_norm": 11.858343928359286,
|
|
"learning_rate": 9.81289782302875e-06,
|
|
"loss": 2.8632054328918457,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.5372899159663865,
|
|
"grad_norm": 8.625622958347869,
|
|
"learning_rate": 9.812068743958912e-06,
|
|
"loss": 3.371345281600952,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 0.5378151260504201,
|
|
"grad_norm": 13.456332343689956,
|
|
"learning_rate": 9.811237867238337e-06,
|
|
"loss": 2.4938735961914062,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.5383403361344538,
|
|
"grad_norm": 13.887856457216152,
|
|
"learning_rate": 9.810405193177418e-06,
|
|
"loss": 2.6703624725341797,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.5388655462184874,
|
|
"grad_norm": 7.315954892490282,
|
|
"learning_rate": 9.809570722087219e-06,
|
|
"loss": 2.005222797393799,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.539390756302521,
|
|
"grad_norm": 32.4494701372808,
|
|
"learning_rate": 9.808734454279473e-06,
|
|
"loss": 1.9920377731323242,
|
|
"step": 1027
|
|
},
|
|
{
|
|
"epoch": 0.5399159663865546,
|
|
"grad_norm": 4.742357389219866,
|
|
"learning_rate": 9.807896390066587e-06,
|
|
"loss": 2.665032386779785,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.5404411764705882,
|
|
"grad_norm": 9.890452547582996,
|
|
"learning_rate": 9.807056529761637e-06,
|
|
"loss": 2.693915605545044,
|
|
"step": 1029
|
|
},
|
|
{
|
|
"epoch": 0.5409663865546218,
|
|
"grad_norm": 15.832264264465673,
|
|
"learning_rate": 9.80621487367837e-06,
|
|
"loss": 3.056929588317871,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.5414915966386554,
|
|
"grad_norm": 7.848495756271129,
|
|
"learning_rate": 9.805371422131205e-06,
|
|
"loss": 3.023015022277832,
|
|
"step": 1031
|
|
},
|
|
{
|
|
"epoch": 0.542016806722689,
|
|
"grad_norm": 11.169805960673793,
|
|
"learning_rate": 9.804526175435231e-06,
|
|
"loss": 2.842832088470459,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.5425420168067226,
|
|
"grad_norm": 12.867254489345834,
|
|
"learning_rate": 9.80367913390621e-06,
|
|
"loss": 2.984562873840332,
|
|
"step": 1033
|
|
},
|
|
{
|
|
"epoch": 0.5430672268907563,
|
|
"grad_norm": 11.364377695454339,
|
|
"learning_rate": 9.80283029786057e-06,
|
|
"loss": 3.140024185180664,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.5435924369747899,
|
|
"grad_norm": 12.253614600348552,
|
|
"learning_rate": 9.801979667615414e-06,
|
|
"loss": 2.9512271881103516,
|
|
"step": 1035
|
|
},
|
|
{
|
|
"epoch": 0.5441176470588235,
|
|
"grad_norm": 9.566896336471828,
|
|
"learning_rate": 9.80112724348851e-06,
|
|
"loss": 2.9276838302612305,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.5446428571428571,
|
|
"grad_norm": 8.150817738952002,
|
|
"learning_rate": 9.800273025798302e-06,
|
|
"loss": 2.9314827919006348,
|
|
"step": 1037
|
|
},
|
|
{
|
|
"epoch": 0.5451680672268907,
|
|
"grad_norm": 8.378956214234197,
|
|
"learning_rate": 9.7994170148639e-06,
|
|
"loss": 2.8263697624206543,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.5456932773109243,
|
|
"grad_norm": 12.837581687837934,
|
|
"learning_rate": 9.798559211005083e-06,
|
|
"loss": 2.652360439300537,
|
|
"step": 1039
|
|
},
|
|
{
|
|
"epoch": 0.5462184873949579,
|
|
"grad_norm": 17.339944801121778,
|
|
"learning_rate": 9.797699614542307e-06,
|
|
"loss": 2.6018738746643066,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.5467436974789915,
|
|
"grad_norm": 9.574990718351845,
|
|
"learning_rate": 9.796838225796688e-06,
|
|
"loss": 2.670292615890503,
|
|
"step": 1041
|
|
},
|
|
{
|
|
"epoch": 0.5472689075630253,
|
|
"grad_norm": 11.442446768722633,
|
|
"learning_rate": 9.795975045090017e-06,
|
|
"loss": 2.682929039001465,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.5477941176470589,
|
|
"grad_norm": 6.5451412317335835,
|
|
"learning_rate": 9.795110072744756e-06,
|
|
"loss": 2.639051914215088,
|
|
"step": 1043
|
|
},
|
|
{
|
|
"epoch": 0.5483193277310925,
|
|
"grad_norm": 6.994297772162765,
|
|
"learning_rate": 9.794243309084031e-06,
|
|
"loss": 2.4831275939941406,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.5488445378151261,
|
|
"grad_norm": 10.901868740772803,
|
|
"learning_rate": 9.793374754431642e-06,
|
|
"loss": 3.0070412158966064,
|
|
"step": 1045
|
|
},
|
|
{
|
|
"epoch": 0.5493697478991597,
|
|
"grad_norm": 9.049053594948145,
|
|
"learning_rate": 9.792504409112054e-06,
|
|
"loss": 2.586428642272949,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.5498949579831933,
|
|
"grad_norm": 10.556684856379588,
|
|
"learning_rate": 9.791632273450405e-06,
|
|
"loss": 2.67769193649292,
|
|
"step": 1047
|
|
},
|
|
{
|
|
"epoch": 0.5504201680672269,
|
|
"grad_norm": 12.770715147815533,
|
|
"learning_rate": 9.790758347772498e-06,
|
|
"loss": 2.2092323303222656,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.5509453781512605,
|
|
"grad_norm": 13.465225428274353,
|
|
"learning_rate": 9.789882632404809e-06,
|
|
"loss": 2.8107967376708984,
|
|
"step": 1049
|
|
},
|
|
{
|
|
"epoch": 0.5514705882352942,
|
|
"grad_norm": 13.452802113852654,
|
|
"learning_rate": 9.789005127674478e-06,
|
|
"loss": 3.391347885131836,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5519957983193278,
|
|
"grad_norm": 8.405309040460729,
|
|
"learning_rate": 9.788125833909316e-06,
|
|
"loss": 2.8124945163726807,
|
|
"step": 1051
|
|
},
|
|
{
|
|
"epoch": 0.5525210084033614,
|
|
"grad_norm": 13.498104235942153,
|
|
"learning_rate": 9.787244751437802e-06,
|
|
"loss": 2.5720763206481934,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.553046218487395,
|
|
"grad_norm": 8.942492315347012,
|
|
"learning_rate": 9.786361880589084e-06,
|
|
"loss": 3.0965919494628906,
|
|
"step": 1053
|
|
},
|
|
{
|
|
"epoch": 0.5535714285714286,
|
|
"grad_norm": 6.713967963971811,
|
|
"learning_rate": 9.785477221692976e-06,
|
|
"loss": 2.4715917110443115,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.5540966386554622,
|
|
"grad_norm": 19.302702706185897,
|
|
"learning_rate": 9.784590775079964e-06,
|
|
"loss": 3.064605951309204,
|
|
"step": 1055
|
|
},
|
|
{
|
|
"epoch": 0.5546218487394958,
|
|
"grad_norm": 12.641159395376391,
|
|
"learning_rate": 9.783702541081192e-06,
|
|
"loss": 2.564366102218628,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.5551470588235294,
|
|
"grad_norm": 11.001270774413294,
|
|
"learning_rate": 9.782812520028487e-06,
|
|
"loss": 2.6874303817749023,
|
|
"step": 1057
|
|
},
|
|
{
|
|
"epoch": 0.555672268907563,
|
|
"grad_norm": 11.818495269271782,
|
|
"learning_rate": 9.781920712254332e-06,
|
|
"loss": 2.3377063274383545,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.5561974789915967,
|
|
"grad_norm": 9.477767430911584,
|
|
"learning_rate": 9.781027118091879e-06,
|
|
"loss": 2.503865957260132,
|
|
"step": 1059
|
|
},
|
|
{
|
|
"epoch": 0.5567226890756303,
|
|
"grad_norm": 13.254389673708763,
|
|
"learning_rate": 9.780131737874949e-06,
|
|
"loss": 2.5261001586914062,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.5572478991596639,
|
|
"grad_norm": 7.470005500306703,
|
|
"learning_rate": 9.779234571938034e-06,
|
|
"loss": 2.468996047973633,
|
|
"step": 1061
|
|
},
|
|
{
|
|
"epoch": 0.5577731092436975,
|
|
"grad_norm": 13.319679733182621,
|
|
"learning_rate": 9.778335620616289e-06,
|
|
"loss": 2.8894853591918945,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.5582983193277311,
|
|
"grad_norm": 11.54918471756888,
|
|
"learning_rate": 9.777434884245533e-06,
|
|
"loss": 2.8393735885620117,
|
|
"step": 1063
|
|
},
|
|
{
|
|
"epoch": 0.5588235294117647,
|
|
"grad_norm": 12.172219585570327,
|
|
"learning_rate": 9.776532363162257e-06,
|
|
"loss": 2.831796407699585,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.5593487394957983,
|
|
"grad_norm": 9.415239467747737,
|
|
"learning_rate": 9.775628057703616e-06,
|
|
"loss": 2.703184127807617,
|
|
"step": 1065
|
|
},
|
|
{
|
|
"epoch": 0.5598739495798319,
|
|
"grad_norm": 11.61775890406432,
|
|
"learning_rate": 9.774721968207437e-06,
|
|
"loss": 2.5092737674713135,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.5603991596638656,
|
|
"grad_norm": 10.469216639388009,
|
|
"learning_rate": 9.773814095012202e-06,
|
|
"loss": 2.348267078399658,
|
|
"step": 1067
|
|
},
|
|
{
|
|
"epoch": 0.5609243697478992,
|
|
"grad_norm": 9.0730502251458,
|
|
"learning_rate": 9.772904438457071e-06,
|
|
"loss": 2.873717784881592,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.5614495798319328,
|
|
"grad_norm": 8.565259361577203,
|
|
"learning_rate": 9.771992998881865e-06,
|
|
"loss": 2.824530839920044,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 0.5619747899159664,
|
|
"grad_norm": 19.17277434009704,
|
|
"learning_rate": 9.771079776627072e-06,
|
|
"loss": 3.1064677238464355,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.5625,
|
|
"grad_norm": 8.140366855205347,
|
|
"learning_rate": 9.770164772033845e-06,
|
|
"loss": 2.7749385833740234,
|
|
"step": 1071
|
|
},
|
|
{
|
|
"epoch": 0.5630252100840336,
|
|
"grad_norm": 13.198742671609411,
|
|
"learning_rate": 9.769247985444002e-06,
|
|
"loss": 3.2205395698547363,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.5635504201680672,
|
|
"grad_norm": 8.367019081966903,
|
|
"learning_rate": 9.768329417200029e-06,
|
|
"loss": 2.4534449577331543,
|
|
"step": 1073
|
|
},
|
|
{
|
|
"epoch": 0.5640756302521008,
|
|
"grad_norm": 7.681181918194523,
|
|
"learning_rate": 9.767409067645078e-06,
|
|
"loss": 2.577038526535034,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.5646008403361344,
|
|
"grad_norm": 17.178070210175466,
|
|
"learning_rate": 9.766486937122964e-06,
|
|
"loss": 2.671081066131592,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.5651260504201681,
|
|
"grad_norm": 9.812669714608955,
|
|
"learning_rate": 9.765563025978169e-06,
|
|
"loss": 2.4747838973999023,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.5656512605042017,
|
|
"grad_norm": 12.844645992485237,
|
|
"learning_rate": 9.76463733455584e-06,
|
|
"loss": 2.7280406951904297,
|
|
"step": 1077
|
|
},
|
|
{
|
|
"epoch": 0.5661764705882353,
|
|
"grad_norm": 20.60693930535657,
|
|
"learning_rate": 9.763709863201789e-06,
|
|
"loss": 2.888597011566162,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.5667016806722689,
|
|
"grad_norm": 22.08845641204313,
|
|
"learning_rate": 9.76278061226249e-06,
|
|
"loss": 2.7614669799804688,
|
|
"step": 1079
|
|
},
|
|
{
|
|
"epoch": 0.5672268907563025,
|
|
"grad_norm": 7.83779863883514,
|
|
"learning_rate": 9.761849582085086e-06,
|
|
"loss": 3.2007977962493896,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.5677521008403361,
|
|
"grad_norm": 8.731312539445284,
|
|
"learning_rate": 9.760916773017386e-06,
|
|
"loss": 2.5763912200927734,
|
|
"step": 1081
|
|
},
|
|
{
|
|
"epoch": 0.5682773109243697,
|
|
"grad_norm": 13.329280642009538,
|
|
"learning_rate": 9.759982185407855e-06,
|
|
"loss": 2.9799554347991943,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.5688025210084033,
|
|
"grad_norm": 9.73542764189317,
|
|
"learning_rate": 9.759045819605635e-06,
|
|
"loss": 2.7777011394500732,
|
|
"step": 1083
|
|
},
|
|
{
|
|
"epoch": 0.569327731092437,
|
|
"grad_norm": 15.11561949994785,
|
|
"learning_rate": 9.758107675960518e-06,
|
|
"loss": 3.3915722370147705,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.5698529411764706,
|
|
"grad_norm": 8.70761881829688,
|
|
"learning_rate": 9.757167754822974e-06,
|
|
"loss": 2.9978628158569336,
|
|
"step": 1085
|
|
},
|
|
{
|
|
"epoch": 0.5703781512605042,
|
|
"grad_norm": 8.89129308068863,
|
|
"learning_rate": 9.756226056544128e-06,
|
|
"loss": 3.0361058712005615,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.5709033613445378,
|
|
"grad_norm": 11.70241312709555,
|
|
"learning_rate": 9.755282581475769e-06,
|
|
"loss": 2.65017032623291,
|
|
"step": 1087
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"grad_norm": 16.74924117531719,
|
|
"learning_rate": 9.754337329970355e-06,
|
|
"loss": 2.61814284324646,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.571953781512605,
|
|
"grad_norm": 9.32895799787225,
|
|
"learning_rate": 9.753390302381006e-06,
|
|
"loss": 2.870701789855957,
|
|
"step": 1089
|
|
},
|
|
{
|
|
"epoch": 0.5724789915966386,
|
|
"grad_norm": 22.462449181708536,
|
|
"learning_rate": 9.7524414990615e-06,
|
|
"loss": 4.07295036315918,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.5730042016806722,
|
|
"grad_norm": 18.906745516318605,
|
|
"learning_rate": 9.751490920366287e-06,
|
|
"loss": 3.0024361610412598,
|
|
"step": 1091
|
|
},
|
|
{
|
|
"epoch": 0.5735294117647058,
|
|
"grad_norm": 10.263059841388436,
|
|
"learning_rate": 9.750538566650474e-06,
|
|
"loss": 2.908892869949341,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.5740546218487395,
|
|
"grad_norm": 18.951927624919907,
|
|
"learning_rate": 9.749584438269833e-06,
|
|
"loss": 2.9717984199523926,
|
|
"step": 1093
|
|
},
|
|
{
|
|
"epoch": 0.5745798319327731,
|
|
"grad_norm": 8.213729091608988,
|
|
"learning_rate": 9.748628535580798e-06,
|
|
"loss": 2.613170623779297,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.5751050420168067,
|
|
"grad_norm": 11.704280389856635,
|
|
"learning_rate": 9.747670858940468e-06,
|
|
"loss": 2.078094959259033,
|
|
"step": 1095
|
|
},
|
|
{
|
|
"epoch": 0.5756302521008403,
|
|
"grad_norm": 8.876822687488534,
|
|
"learning_rate": 9.746711408706607e-06,
|
|
"loss": 1.7200992107391357,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.5761554621848739,
|
|
"grad_norm": 9.230253639578661,
|
|
"learning_rate": 9.74575018523763e-06,
|
|
"loss": 2.741290330886841,
|
|
"step": 1097
|
|
},
|
|
{
|
|
"epoch": 0.5766806722689075,
|
|
"grad_norm": 12.412888033861083,
|
|
"learning_rate": 9.74478718889263e-06,
|
|
"loss": 2.882063865661621,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.5772058823529411,
|
|
"grad_norm": 9.543578956177342,
|
|
"learning_rate": 9.74382242003135e-06,
|
|
"loss": 2.2171170711517334,
|
|
"step": 1099
|
|
},
|
|
{
|
|
"epoch": 0.5777310924369747,
|
|
"grad_norm": 15.084306947965192,
|
|
"learning_rate": 9.742855879014202e-06,
|
|
"loss": 2.855663776397705,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5782563025210085,
|
|
"grad_norm": 14.23142601301457,
|
|
"learning_rate": 9.741887566202259e-06,
|
|
"loss": 2.897360324859619,
|
|
"step": 1101
|
|
},
|
|
{
|
|
"epoch": 0.5787815126050421,
|
|
"grad_norm": 13.059435415100035,
|
|
"learning_rate": 9.740917481957253e-06,
|
|
"loss": 3.0059051513671875,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.5793067226890757,
|
|
"grad_norm": 8.932422966842017,
|
|
"learning_rate": 9.739945626641579e-06,
|
|
"loss": 2.6640849113464355,
|
|
"step": 1103
|
|
},
|
|
{
|
|
"epoch": 0.5798319327731093,
|
|
"grad_norm": 9.796607570591627,
|
|
"learning_rate": 9.738972000618296e-06,
|
|
"loss": 2.855018377304077,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.5803571428571429,
|
|
"grad_norm": 16.558214916245234,
|
|
"learning_rate": 9.737996604251124e-06,
|
|
"loss": 2.8122780323028564,
|
|
"step": 1105
|
|
},
|
|
{
|
|
"epoch": 0.5808823529411765,
|
|
"grad_norm": 11.418000282461028,
|
|
"learning_rate": 9.737019437904438e-06,
|
|
"loss": 2.5800719261169434,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.5814075630252101,
|
|
"grad_norm": 8.156757069327533,
|
|
"learning_rate": 9.736040501943285e-06,
|
|
"loss": 2.724315643310547,
|
|
"step": 1107
|
|
},
|
|
{
|
|
"epoch": 0.5819327731092437,
|
|
"grad_norm": 17.815625375197854,
|
|
"learning_rate": 9.735059796733363e-06,
|
|
"loss": 3.6219096183776855,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.5824579831932774,
|
|
"grad_norm": 10.501857298537288,
|
|
"learning_rate": 9.73407732264104e-06,
|
|
"loss": 3.04555082321167,
|
|
"step": 1109
|
|
},
|
|
{
|
|
"epoch": 0.582983193277311,
|
|
"grad_norm": 9.555867042957924,
|
|
"learning_rate": 9.733093080033335e-06,
|
|
"loss": 2.9941024780273438,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.5835084033613446,
|
|
"grad_norm": 11.498471043031197,
|
|
"learning_rate": 9.732107069277936e-06,
|
|
"loss": 2.79038667678833,
|
|
"step": 1111
|
|
},
|
|
{
|
|
"epoch": 0.5840336134453782,
|
|
"grad_norm": 18.205145944991067,
|
|
"learning_rate": 9.731119290743188e-06,
|
|
"loss": 2.674743413925171,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.5845588235294118,
|
|
"grad_norm": 8.555916624666907,
|
|
"learning_rate": 9.730129744798096e-06,
|
|
"loss": 2.9587817192077637,
|
|
"step": 1113
|
|
},
|
|
{
|
|
"epoch": 0.5850840336134454,
|
|
"grad_norm": 8.748068827463866,
|
|
"learning_rate": 9.729138431812327e-06,
|
|
"loss": 2.8857421875,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.585609243697479,
|
|
"grad_norm": 7.666558845996576,
|
|
"learning_rate": 9.728145352156203e-06,
|
|
"loss": 2.5033679008483887,
|
|
"step": 1115
|
|
},
|
|
{
|
|
"epoch": 0.5861344537815126,
|
|
"grad_norm": 20.32998550651072,
|
|
"learning_rate": 9.727150506200715e-06,
|
|
"loss": 3.6410534381866455,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.5866596638655462,
|
|
"grad_norm": 7.739201012658456,
|
|
"learning_rate": 9.726153894317508e-06,
|
|
"loss": 2.701643943786621,
|
|
"step": 1117
|
|
},
|
|
{
|
|
"epoch": 0.5871848739495799,
|
|
"grad_norm": 11.367614124351507,
|
|
"learning_rate": 9.725155516878886e-06,
|
|
"loss": 3.276360273361206,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.5877100840336135,
|
|
"grad_norm": 11.862499535452493,
|
|
"learning_rate": 9.724155374257816e-06,
|
|
"loss": 2.570164203643799,
|
|
"step": 1119
|
|
},
|
|
{
|
|
"epoch": 0.5882352941176471,
|
|
"grad_norm": 13.119065243909068,
|
|
"learning_rate": 9.72315346682792e-06,
|
|
"loss": 2.958350419998169,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.5887605042016807,
|
|
"grad_norm": 19.464185695283074,
|
|
"learning_rate": 9.722149794963483e-06,
|
|
"loss": 2.5229337215423584,
|
|
"step": 1121
|
|
},
|
|
{
|
|
"epoch": 0.5892857142857143,
|
|
"grad_norm": 8.71824757303977,
|
|
"learning_rate": 9.721144359039448e-06,
|
|
"loss": 2.857860565185547,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.5898109243697479,
|
|
"grad_norm": 7.191404205099183,
|
|
"learning_rate": 9.720137159431418e-06,
|
|
"loss": 3.283879518508911,
|
|
"step": 1123
|
|
},
|
|
{
|
|
"epoch": 0.5903361344537815,
|
|
"grad_norm": 10.53641421236998,
|
|
"learning_rate": 9.719128196515653e-06,
|
|
"loss": 2.851640224456787,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.5908613445378151,
|
|
"grad_norm": 13.396472099026342,
|
|
"learning_rate": 9.718117470669072e-06,
|
|
"loss": 3.0612597465515137,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.5913865546218487,
|
|
"grad_norm": 10.271472877546353,
|
|
"learning_rate": 9.717104982269257e-06,
|
|
"loss": 2.798976421356201,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.5919117647058824,
|
|
"grad_norm": 12.100564214276803,
|
|
"learning_rate": 9.716090731694439e-06,
|
|
"loss": 2.4026825428009033,
|
|
"step": 1127
|
|
},
|
|
{
|
|
"epoch": 0.592436974789916,
|
|
"grad_norm": 16.904155491225104,
|
|
"learning_rate": 9.715074719323515e-06,
|
|
"loss": 3.1392836570739746,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.5929621848739496,
|
|
"grad_norm": 12.195628748562735,
|
|
"learning_rate": 9.714056945536039e-06,
|
|
"loss": 2.194385051727295,
|
|
"step": 1129
|
|
},
|
|
{
|
|
"epoch": 0.5934873949579832,
|
|
"grad_norm": 9.206581903261434,
|
|
"learning_rate": 9.713037410712222e-06,
|
|
"loss": 2.4214446544647217,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.5940126050420168,
|
|
"grad_norm": 12.966888511805234,
|
|
"learning_rate": 9.712016115232932e-06,
|
|
"loss": 2.4819564819335938,
|
|
"step": 1131
|
|
},
|
|
{
|
|
"epoch": 0.5945378151260504,
|
|
"grad_norm": 9.888297193550352,
|
|
"learning_rate": 9.710993059479695e-06,
|
|
"loss": 3.1289210319519043,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.595063025210084,
|
|
"grad_norm": 10.95708527935894,
|
|
"learning_rate": 9.709968243834698e-06,
|
|
"loss": 2.821739673614502,
|
|
"step": 1133
|
|
},
|
|
{
|
|
"epoch": 0.5955882352941176,
|
|
"grad_norm": 13.524970991193987,
|
|
"learning_rate": 9.70894166868078e-06,
|
|
"loss": 3.322603702545166,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.5961134453781513,
|
|
"grad_norm": 8.123611701582872,
|
|
"learning_rate": 9.707913334401441e-06,
|
|
"loss": 2.6867642402648926,
|
|
"step": 1135
|
|
},
|
|
{
|
|
"epoch": 0.5966386554621849,
|
|
"grad_norm": 37.803610433950304,
|
|
"learning_rate": 9.706883241380838e-06,
|
|
"loss": 2.36342191696167,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.5971638655462185,
|
|
"grad_norm": 9.012080624683833,
|
|
"learning_rate": 9.705851390003783e-06,
|
|
"loss": 2.456573486328125,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 0.5976890756302521,
|
|
"grad_norm": 8.45889112860806,
|
|
"learning_rate": 9.704817780655746e-06,
|
|
"loss": 2.8405826091766357,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.5982142857142857,
|
|
"grad_norm": 15.385597854811314,
|
|
"learning_rate": 9.703782413722856e-06,
|
|
"loss": 2.8452396392822266,
|
|
"step": 1139
|
|
},
|
|
{
|
|
"epoch": 0.5987394957983193,
|
|
"grad_norm": 12.92550188250134,
|
|
"learning_rate": 9.702745289591892e-06,
|
|
"loss": 2.7421834468841553,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.5992647058823529,
|
|
"grad_norm": 12.738073299096635,
|
|
"learning_rate": 9.7017064086503e-06,
|
|
"loss": 3.2678005695343018,
|
|
"step": 1141
|
|
},
|
|
{
|
|
"epoch": 0.5997899159663865,
|
|
"grad_norm": 8.289371569526935,
|
|
"learning_rate": 9.70066577128617e-06,
|
|
"loss": 2.658457040786743,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.6003151260504201,
|
|
"grad_norm": 10.833496630831867,
|
|
"learning_rate": 9.699623377888256e-06,
|
|
"loss": 2.4036011695861816,
|
|
"step": 1143
|
|
},
|
|
{
|
|
"epoch": 0.6008403361344538,
|
|
"grad_norm": 15.492760361146278,
|
|
"learning_rate": 9.69857922884597e-06,
|
|
"loss": 2.4981842041015625,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.6013655462184874,
|
|
"grad_norm": 12.11767186572831,
|
|
"learning_rate": 9.697533324549371e-06,
|
|
"loss": 3.1888647079467773,
|
|
"step": 1145
|
|
},
|
|
{
|
|
"epoch": 0.601890756302521,
|
|
"grad_norm": 10.573838040176359,
|
|
"learning_rate": 9.69648566538918e-06,
|
|
"loss": 2.7450263500213623,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.6024159663865546,
|
|
"grad_norm": 22.53615825568267,
|
|
"learning_rate": 9.695436251756775e-06,
|
|
"loss": 3.4375998973846436,
|
|
"step": 1147
|
|
},
|
|
{
|
|
"epoch": 0.6029411764705882,
|
|
"grad_norm": 13.678486235698054,
|
|
"learning_rate": 9.694385084044185e-06,
|
|
"loss": 3.148430347442627,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.6034663865546218,
|
|
"grad_norm": 15.516844451611417,
|
|
"learning_rate": 9.693332162644095e-06,
|
|
"loss": 2.8116562366485596,
|
|
"step": 1149
|
|
},
|
|
{
|
|
"epoch": 0.6039915966386554,
|
|
"grad_norm": 9.826042722686092,
|
|
"learning_rate": 9.692277487949849e-06,
|
|
"loss": 2.690570831298828,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.604516806722689,
|
|
"grad_norm": 13.307863596273979,
|
|
"learning_rate": 9.69122106035544e-06,
|
|
"loss": 2.812605857849121,
|
|
"step": 1151
|
|
},
|
|
{
|
|
"epoch": 0.6050420168067226,
|
|
"grad_norm": 11.440540259437942,
|
|
"learning_rate": 9.690162880255521e-06,
|
|
"loss": 3.1070003509521484,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.6055672268907563,
|
|
"grad_norm": 12.259735929652518,
|
|
"learning_rate": 9.689102948045398e-06,
|
|
"loss": 3.474421501159668,
|
|
"step": 1153
|
|
},
|
|
{
|
|
"epoch": 0.6060924369747899,
|
|
"grad_norm": 10.129940184206724,
|
|
"learning_rate": 9.688041264121031e-06,
|
|
"loss": 3.1540329456329346,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.6066176470588235,
|
|
"grad_norm": 10.913585193843712,
|
|
"learning_rate": 9.686977828879033e-06,
|
|
"loss": 2.887948989868164,
|
|
"step": 1155
|
|
},
|
|
{
|
|
"epoch": 0.6071428571428571,
|
|
"grad_norm": 13.432164188694037,
|
|
"learning_rate": 9.685912642716674e-06,
|
|
"loss": 2.8469412326812744,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.6076680672268907,
|
|
"grad_norm": 11.19226800007551,
|
|
"learning_rate": 9.684845706031878e-06,
|
|
"loss": 2.606762170791626,
|
|
"step": 1157
|
|
},
|
|
{
|
|
"epoch": 0.6081932773109243,
|
|
"grad_norm": 7.666606187379233,
|
|
"learning_rate": 9.683777019223219e-06,
|
|
"loss": 2.2800896167755127,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.6087184873949579,
|
|
"grad_norm": 8.199448460822813,
|
|
"learning_rate": 9.682706582689932e-06,
|
|
"loss": 2.4749555587768555,
|
|
"step": 1159
|
|
},
|
|
{
|
|
"epoch": 0.6092436974789915,
|
|
"grad_norm": 13.320939746045067,
|
|
"learning_rate": 9.681634396831899e-06,
|
|
"loss": 3.058583974838257,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.6097689075630253,
|
|
"grad_norm": 7.818142107419255,
|
|
"learning_rate": 9.680560462049657e-06,
|
|
"loss": 2.5522444248199463,
|
|
"step": 1161
|
|
},
|
|
{
|
|
"epoch": 0.6102941176470589,
|
|
"grad_norm": 15.02939473129863,
|
|
"learning_rate": 9.679484778744396e-06,
|
|
"loss": 2.9182796478271484,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.6108193277310925,
|
|
"grad_norm": 6.127233982969524,
|
|
"learning_rate": 9.678407347317967e-06,
|
|
"loss": 2.725381374359131,
|
|
"step": 1163
|
|
},
|
|
{
|
|
"epoch": 0.6113445378151261,
|
|
"grad_norm": 20.265398877815926,
|
|
"learning_rate": 9.677328168172858e-06,
|
|
"loss": 3.0166091918945312,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.6118697478991597,
|
|
"grad_norm": 12.918754190555548,
|
|
"learning_rate": 9.676247241712228e-06,
|
|
"loss": 2.6854474544525146,
|
|
"step": 1165
|
|
},
|
|
{
|
|
"epoch": 0.6123949579831933,
|
|
"grad_norm": 12.011574482145152,
|
|
"learning_rate": 9.675164568339875e-06,
|
|
"loss": 2.832428455352783,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.6129201680672269,
|
|
"grad_norm": 16.585008813925967,
|
|
"learning_rate": 9.674080148460257e-06,
|
|
"loss": 3.083036422729492,
|
|
"step": 1167
|
|
},
|
|
{
|
|
"epoch": 0.6134453781512605,
|
|
"grad_norm": 33.33992251292681,
|
|
"learning_rate": 9.67299398247848e-06,
|
|
"loss": 3.9661097526550293,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.6139705882352942,
|
|
"grad_norm": 10.746932335764692,
|
|
"learning_rate": 9.671906070800307e-06,
|
|
"loss": 2.783630132675171,
|
|
"step": 1169
|
|
},
|
|
{
|
|
"epoch": 0.6144957983193278,
|
|
"grad_norm": 9.840503537133605,
|
|
"learning_rate": 9.670816413832147e-06,
|
|
"loss": 2.8105361461639404,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.6150210084033614,
|
|
"grad_norm": 13.050816812308216,
|
|
"learning_rate": 9.669725011981068e-06,
|
|
"loss": 2.0451159477233887,
|
|
"step": 1171
|
|
},
|
|
{
|
|
"epoch": 0.615546218487395,
|
|
"grad_norm": 12.585068193664572,
|
|
"learning_rate": 9.668631865654786e-06,
|
|
"loss": 2.873997688293457,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.6160714285714286,
|
|
"grad_norm": 15.938353723255233,
|
|
"learning_rate": 9.667536975261667e-06,
|
|
"loss": 3.156132936477661,
|
|
"step": 1173
|
|
},
|
|
{
|
|
"epoch": 0.6165966386554622,
|
|
"grad_norm": 10.430976638419382,
|
|
"learning_rate": 9.666440341210732e-06,
|
|
"loss": 2.876246929168701,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.6171218487394958,
|
|
"grad_norm": 7.6978361066179115,
|
|
"learning_rate": 9.665341963911653e-06,
|
|
"loss": 3.0936636924743652,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.6176470588235294,
|
|
"grad_norm": 22.572543740555304,
|
|
"learning_rate": 9.66424184377475e-06,
|
|
"loss": 3.228053092956543,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.618172268907563,
|
|
"grad_norm": 8.045368216788974,
|
|
"learning_rate": 9.663139981210998e-06,
|
|
"loss": 2.4929630756378174,
|
|
"step": 1177
|
|
},
|
|
{
|
|
"epoch": 0.6186974789915967,
|
|
"grad_norm": 14.198458888679136,
|
|
"learning_rate": 9.66203637663202e-06,
|
|
"loss": 3.2256741523742676,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.6192226890756303,
|
|
"grad_norm": 8.97046477193514,
|
|
"learning_rate": 9.660931030450092e-06,
|
|
"loss": 2.9510498046875,
|
|
"step": 1179
|
|
},
|
|
{
|
|
"epoch": 0.6197478991596639,
|
|
"grad_norm": 8.751914637591376,
|
|
"learning_rate": 9.65982394307814e-06,
|
|
"loss": 2.904456853866577,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.6202731092436975,
|
|
"grad_norm": 6.6941921577657375,
|
|
"learning_rate": 9.658715114929737e-06,
|
|
"loss": 2.760448455810547,
|
|
"step": 1181
|
|
},
|
|
{
|
|
"epoch": 0.6207983193277311,
|
|
"grad_norm": 7.5692966304314435,
|
|
"learning_rate": 9.657604546419114e-06,
|
|
"loss": 2.975520610809326,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.6213235294117647,
|
|
"grad_norm": 29.173755091178357,
|
|
"learning_rate": 9.656492237961143e-06,
|
|
"loss": 1.9504122734069824,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 0.6218487394957983,
|
|
"grad_norm": 20.213219631267187,
|
|
"learning_rate": 9.655378189971354e-06,
|
|
"loss": 2.0044260025024414,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.6223739495798319,
|
|
"grad_norm": 10.135311570606858,
|
|
"learning_rate": 9.654262402865922e-06,
|
|
"loss": 3.064586639404297,
|
|
"step": 1185
|
|
},
|
|
{
|
|
"epoch": 0.6228991596638656,
|
|
"grad_norm": 10.221859142786899,
|
|
"learning_rate": 9.653144877061672e-06,
|
|
"loss": 2.7229180335998535,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.6234243697478992,
|
|
"grad_norm": 13.950082720956196,
|
|
"learning_rate": 9.652025612976082e-06,
|
|
"loss": 2.699241876602173,
|
|
"step": 1187
|
|
},
|
|
{
|
|
"epoch": 0.6239495798319328,
|
|
"grad_norm": 17.39937903729645,
|
|
"learning_rate": 9.650904611027275e-06,
|
|
"loss": 3.4351096153259277,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.6244747899159664,
|
|
"grad_norm": 8.560759495398628,
|
|
"learning_rate": 9.649781871634025e-06,
|
|
"loss": 2.8936972618103027,
|
|
"step": 1189
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 11.049356382816127,
|
|
"learning_rate": 9.648657395215756e-06,
|
|
"loss": 3.0585176944732666,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.6255252100840336,
|
|
"grad_norm": 17.913817531741632,
|
|
"learning_rate": 9.647531182192542e-06,
|
|
"loss": 2.4928512573242188,
|
|
"step": 1191
|
|
},
|
|
{
|
|
"epoch": 0.6260504201680672,
|
|
"grad_norm": 10.590209884627793,
|
|
"learning_rate": 9.6464032329851e-06,
|
|
"loss": 3.0131940841674805,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.6265756302521008,
|
|
"grad_norm": 8.177485325467538,
|
|
"learning_rate": 9.6452735480148e-06,
|
|
"loss": 1.599998116493225,
|
|
"step": 1193
|
|
},
|
|
{
|
|
"epoch": 0.6271008403361344,
|
|
"grad_norm": 6.972074972722784,
|
|
"learning_rate": 9.644142127703662e-06,
|
|
"loss": 2.8549647331237793,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.6276260504201681,
|
|
"grad_norm": 7.967186455233471,
|
|
"learning_rate": 9.643008972474352e-06,
|
|
"loss": 2.789647102355957,
|
|
"step": 1195
|
|
},
|
|
{
|
|
"epoch": 0.6281512605042017,
|
|
"grad_norm": 10.401127520833763,
|
|
"learning_rate": 9.641874082750185e-06,
|
|
"loss": 2.69581937789917,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.6286764705882353,
|
|
"grad_norm": 11.500047151382882,
|
|
"learning_rate": 9.64073745895512e-06,
|
|
"loss": 2.8014025688171387,
|
|
"step": 1197
|
|
},
|
|
{
|
|
"epoch": 0.6292016806722689,
|
|
"grad_norm": 12.130292686014862,
|
|
"learning_rate": 9.639599101513769e-06,
|
|
"loss": 2.4452357292175293,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.6297268907563025,
|
|
"grad_norm": 31.67487720255507,
|
|
"learning_rate": 9.638459010851389e-06,
|
|
"loss": 2.4740800857543945,
|
|
"step": 1199
|
|
},
|
|
{
|
|
"epoch": 0.6302521008403361,
|
|
"grad_norm": 13.336000861839528,
|
|
"learning_rate": 9.637317187393885e-06,
|
|
"loss": 2.855738878250122,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.6307773109243697,
|
|
"grad_norm": 10.426998643566288,
|
|
"learning_rate": 9.636173631567812e-06,
|
|
"loss": 2.6344735622406006,
|
|
"step": 1201
|
|
},
|
|
{
|
|
"epoch": 0.6313025210084033,
|
|
"grad_norm": 6.536286840045967,
|
|
"learning_rate": 9.635028343800365e-06,
|
|
"loss": 2.8178374767303467,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.631827731092437,
|
|
"grad_norm": 9.08335092165054,
|
|
"learning_rate": 9.633881324519397e-06,
|
|
"loss": 2.8191640377044678,
|
|
"step": 1203
|
|
},
|
|
{
|
|
"epoch": 0.6323529411764706,
|
|
"grad_norm": 8.273507395941941,
|
|
"learning_rate": 9.632732574153393e-06,
|
|
"loss": 2.8788299560546875,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.6328781512605042,
|
|
"grad_norm": 8.536280407992857,
|
|
"learning_rate": 9.631582093131501e-06,
|
|
"loss": 2.449352264404297,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 0.6334033613445378,
|
|
"grad_norm": 17.05668954424723,
|
|
"learning_rate": 9.630429881883506e-06,
|
|
"loss": 2.9622299671173096,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.6339285714285714,
|
|
"grad_norm": 16.360472118594963,
|
|
"learning_rate": 9.629275940839838e-06,
|
|
"loss": 3.2144505977630615,
|
|
"step": 1207
|
|
},
|
|
{
|
|
"epoch": 0.634453781512605,
|
|
"grad_norm": 36.55273344134769,
|
|
"learning_rate": 9.628120270431579e-06,
|
|
"loss": 2.3319010734558105,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.6349789915966386,
|
|
"grad_norm": 8.306416270383187,
|
|
"learning_rate": 9.62696287109045e-06,
|
|
"loss": 2.8658809661865234,
|
|
"step": 1209
|
|
},
|
|
{
|
|
"epoch": 0.6355042016806722,
|
|
"grad_norm": 13.216888673053313,
|
|
"learning_rate": 9.625803743248828e-06,
|
|
"loss": 2.698309898376465,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.6360294117647058,
|
|
"grad_norm": 12.55219430082199,
|
|
"learning_rate": 9.624642887339726e-06,
|
|
"loss": 2.3321962356567383,
|
|
"step": 1211
|
|
},
|
|
{
|
|
"epoch": 0.6365546218487395,
|
|
"grad_norm": 13.689849334238781,
|
|
"learning_rate": 9.623480303796809e-06,
|
|
"loss": 3.1201837062835693,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.6370798319327731,
|
|
"grad_norm": 7.734765266763488,
|
|
"learning_rate": 9.622315993054384e-06,
|
|
"loss": 3.217038154602051,
|
|
"step": 1213
|
|
},
|
|
{
|
|
"epoch": 0.6376050420168067,
|
|
"grad_norm": 6.64830203506993,
|
|
"learning_rate": 9.621149955547401e-06,
|
|
"loss": 2.8321728706359863,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.6381302521008403,
|
|
"grad_norm": 9.338230711426522,
|
|
"learning_rate": 9.619982191711462e-06,
|
|
"loss": 2.633908271789551,
|
|
"step": 1215
|
|
},
|
|
{
|
|
"epoch": 0.6386554621848739,
|
|
"grad_norm": 13.63438630462051,
|
|
"learning_rate": 9.618812701982808e-06,
|
|
"loss": 3.059248924255371,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.6391806722689075,
|
|
"grad_norm": 14.973627433044415,
|
|
"learning_rate": 9.61764148679833e-06,
|
|
"loss": 3.526207685470581,
|
|
"step": 1217
|
|
},
|
|
{
|
|
"epoch": 0.6397058823529411,
|
|
"grad_norm": 9.72191311741727,
|
|
"learning_rate": 9.616468546595556e-06,
|
|
"loss": 2.847991466522217,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.6402310924369747,
|
|
"grad_norm": 9.733912205577893,
|
|
"learning_rate": 9.615293881812666e-06,
|
|
"loss": 2.928663492202759,
|
|
"step": 1219
|
|
},
|
|
{
|
|
"epoch": 0.6407563025210085,
|
|
"grad_norm": 15.627623677478192,
|
|
"learning_rate": 9.614117492888479e-06,
|
|
"loss": 3.010359525680542,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.6412815126050421,
|
|
"grad_norm": 8.65174457200257,
|
|
"learning_rate": 9.61293938026246e-06,
|
|
"loss": 2.901470184326172,
|
|
"step": 1221
|
|
},
|
|
{
|
|
"epoch": 0.6418067226890757,
|
|
"grad_norm": 23.717025681574377,
|
|
"learning_rate": 9.611759544374719e-06,
|
|
"loss": 3.327909231185913,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.6423319327731093,
|
|
"grad_norm": 23.457512475487516,
|
|
"learning_rate": 9.61057798566601e-06,
|
|
"loss": 2.769148588180542,
|
|
"step": 1223
|
|
},
|
|
{
|
|
"epoch": 0.6428571428571429,
|
|
"grad_norm": 11.314647776230938,
|
|
"learning_rate": 9.609394704577728e-06,
|
|
"loss": 2.668102502822876,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.6433823529411765,
|
|
"grad_norm": 9.855746697379743,
|
|
"learning_rate": 9.608209701551913e-06,
|
|
"loss": 2.606781482696533,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.6439075630252101,
|
|
"grad_norm": 12.64896610603209,
|
|
"learning_rate": 9.607022977031247e-06,
|
|
"loss": 2.465226173400879,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.6444327731092437,
|
|
"grad_norm": 12.015146340376507,
|
|
"learning_rate": 9.60583453145906e-06,
|
|
"loss": 1.7274982929229736,
|
|
"step": 1227
|
|
},
|
|
{
|
|
"epoch": 0.6449579831932774,
|
|
"grad_norm": 12.108841692254453,
|
|
"learning_rate": 9.604644365279316e-06,
|
|
"loss": 2.740081310272217,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.645483193277311,
|
|
"grad_norm": 10.128874048559293,
|
|
"learning_rate": 9.60345247893663e-06,
|
|
"loss": 2.892533302307129,
|
|
"step": 1229
|
|
},
|
|
{
|
|
"epoch": 0.6460084033613446,
|
|
"grad_norm": 12.230768852722859,
|
|
"learning_rate": 9.602258872876256e-06,
|
|
"loss": 2.738703727722168,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.6465336134453782,
|
|
"grad_norm": 13.988678221971234,
|
|
"learning_rate": 9.60106354754409e-06,
|
|
"loss": 2.6789562702178955,
|
|
"step": 1231
|
|
},
|
|
{
|
|
"epoch": 0.6470588235294118,
|
|
"grad_norm": 10.144174129686618,
|
|
"learning_rate": 9.599866503386673e-06,
|
|
"loss": 2.8041086196899414,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.6475840336134454,
|
|
"grad_norm": 11.550532444108269,
|
|
"learning_rate": 9.598667740851187e-06,
|
|
"loss": 2.9156932830810547,
|
|
"step": 1233
|
|
},
|
|
{
|
|
"epoch": 0.648109243697479,
|
|
"grad_norm": 13.964702510735874,
|
|
"learning_rate": 9.597467260385452e-06,
|
|
"loss": 2.6370129585266113,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.6486344537815126,
|
|
"grad_norm": 17.928296596902026,
|
|
"learning_rate": 9.596265062437933e-06,
|
|
"loss": 3.0181162357330322,
|
|
"step": 1235
|
|
},
|
|
{
|
|
"epoch": 0.6491596638655462,
|
|
"grad_norm": 7.731089233325417,
|
|
"learning_rate": 9.59506114745774e-06,
|
|
"loss": 2.9872469902038574,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.6496848739495799,
|
|
"grad_norm": 10.283607724998227,
|
|
"learning_rate": 9.59385551589462e-06,
|
|
"loss": 2.948763608932495,
|
|
"step": 1237
|
|
},
|
|
{
|
|
"epoch": 0.6502100840336135,
|
|
"grad_norm": 17.150723223022805,
|
|
"learning_rate": 9.592648168198961e-06,
|
|
"loss": 2.2981417179107666,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.6507352941176471,
|
|
"grad_norm": 20.940451908364867,
|
|
"learning_rate": 9.591439104821795e-06,
|
|
"loss": 3.1842682361602783,
|
|
"step": 1239
|
|
},
|
|
{
|
|
"epoch": 0.6512605042016807,
|
|
"grad_norm": 8.770053306727121,
|
|
"learning_rate": 9.590228326214794e-06,
|
|
"loss": 2.823775053024292,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.6517857142857143,
|
|
"grad_norm": 12.110727698299284,
|
|
"learning_rate": 9.589015832830267e-06,
|
|
"loss": 2.3811817169189453,
|
|
"step": 1241
|
|
},
|
|
{
|
|
"epoch": 0.6523109243697479,
|
|
"grad_norm": 9.106069774662254,
|
|
"learning_rate": 9.587801625121167e-06,
|
|
"loss": 2.5018489360809326,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.6528361344537815,
|
|
"grad_norm": 21.827303844607535,
|
|
"learning_rate": 9.586585703541092e-06,
|
|
"loss": 2.984823703765869,
|
|
"step": 1243
|
|
},
|
|
{
|
|
"epoch": 0.6533613445378151,
|
|
"grad_norm": 9.74472805235711,
|
|
"learning_rate": 9.58536806854427e-06,
|
|
"loss": 3.300760269165039,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.6538865546218487,
|
|
"grad_norm": 11.01056962582444,
|
|
"learning_rate": 9.584148720585575e-06,
|
|
"loss": 2.8390417098999023,
|
|
"step": 1245
|
|
},
|
|
{
|
|
"epoch": 0.6544117647058824,
|
|
"grad_norm": 8.826552371043519,
|
|
"learning_rate": 9.582927660120524e-06,
|
|
"loss": 3.0983588695526123,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.654936974789916,
|
|
"grad_norm": 11.04747213581895,
|
|
"learning_rate": 9.581704887605267e-06,
|
|
"loss": 3.279728889465332,
|
|
"step": 1247
|
|
},
|
|
{
|
|
"epoch": 0.6554621848739496,
|
|
"grad_norm": 17.88074046271364,
|
|
"learning_rate": 9.580480403496599e-06,
|
|
"loss": 2.770259380340576,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.6559873949579832,
|
|
"grad_norm": 10.30587242756858,
|
|
"learning_rate": 9.57925420825195e-06,
|
|
"loss": 2.5752410888671875,
|
|
"step": 1249
|
|
},
|
|
{
|
|
"epoch": 0.6565126050420168,
|
|
"grad_norm": 10.025829680080944,
|
|
"learning_rate": 9.578026302329391e-06,
|
|
"loss": 2.874560832977295,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.6570378151260504,
|
|
"grad_norm": 8.244483440682384,
|
|
"learning_rate": 9.576796686187635e-06,
|
|
"loss": 2.392122745513916,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 0.657563025210084,
|
|
"grad_norm": 9.694849465338885,
|
|
"learning_rate": 9.57556536028603e-06,
|
|
"loss": 2.232323408126831,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.6580882352941176,
|
|
"grad_norm": 18.1786080559346,
|
|
"learning_rate": 9.574332325084564e-06,
|
|
"loss": 2.4674391746520996,
|
|
"step": 1253
|
|
},
|
|
{
|
|
"epoch": 0.6586134453781513,
|
|
"grad_norm": 8.37795044251335,
|
|
"learning_rate": 9.573097581043861e-06,
|
|
"loss": 3.0313472747802734,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.6591386554621849,
|
|
"grad_norm": 7.786784506589957,
|
|
"learning_rate": 9.571861128625191e-06,
|
|
"loss": 2.654280185699463,
|
|
"step": 1255
|
|
},
|
|
{
|
|
"epoch": 0.6596638655462185,
|
|
"grad_norm": 11.678518611114994,
|
|
"learning_rate": 9.570622968290455e-06,
|
|
"loss": 2.807952880859375,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.6601890756302521,
|
|
"grad_norm": 11.8002752101054,
|
|
"learning_rate": 9.569383100502193e-06,
|
|
"loss": 2.6909966468811035,
|
|
"step": 1257
|
|
},
|
|
{
|
|
"epoch": 0.6607142857142857,
|
|
"grad_norm": 19.067675110443254,
|
|
"learning_rate": 9.568141525723582e-06,
|
|
"loss": 2.5166993141174316,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.6612394957983193,
|
|
"grad_norm": 18.388895506648996,
|
|
"learning_rate": 9.566898244418443e-06,
|
|
"loss": 3.5859007835388184,
|
|
"step": 1259
|
|
},
|
|
{
|
|
"epoch": 0.6617647058823529,
|
|
"grad_norm": 9.61108395228637,
|
|
"learning_rate": 9.565653257051228e-06,
|
|
"loss": 2.6796000003814697,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.6622899159663865,
|
|
"grad_norm": 7.540243622628439,
|
|
"learning_rate": 9.564406564087032e-06,
|
|
"loss": 2.68310809135437,
|
|
"step": 1261
|
|
},
|
|
{
|
|
"epoch": 0.6628151260504201,
|
|
"grad_norm": 6.658163745066767,
|
|
"learning_rate": 9.563158165991577e-06,
|
|
"loss": 2.541107177734375,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.6633403361344538,
|
|
"grad_norm": 13.042698254835107,
|
|
"learning_rate": 9.561908063231234e-06,
|
|
"loss": 2.2719717025756836,
|
|
"step": 1263
|
|
},
|
|
{
|
|
"epoch": 0.6638655462184874,
|
|
"grad_norm": 14.826951460449694,
|
|
"learning_rate": 9.560656256273004e-06,
|
|
"loss": 2.5673341751098633,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.664390756302521,
|
|
"grad_norm": 8.500017556503874,
|
|
"learning_rate": 9.559402745584527e-06,
|
|
"loss": 2.7344629764556885,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 0.6649159663865546,
|
|
"grad_norm": 13.155321458061042,
|
|
"learning_rate": 9.558147531634076e-06,
|
|
"loss": 2.5384857654571533,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.6654411764705882,
|
|
"grad_norm": 6.01182696383257,
|
|
"learning_rate": 9.556890614890565e-06,
|
|
"loss": 2.630429267883301,
|
|
"step": 1267
|
|
},
|
|
{
|
|
"epoch": 0.6659663865546218,
|
|
"grad_norm": 6.627709500576783,
|
|
"learning_rate": 9.555631995823543e-06,
|
|
"loss": 2.5639748573303223,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.6664915966386554,
|
|
"grad_norm": 7.936485985229376,
|
|
"learning_rate": 9.554371674903191e-06,
|
|
"loss": 2.715306282043457,
|
|
"step": 1269
|
|
},
|
|
{
|
|
"epoch": 0.667016806722689,
|
|
"grad_norm": 11.047934970062675,
|
|
"learning_rate": 9.55310965260033e-06,
|
|
"loss": 2.6093361377716064,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.6675420168067226,
|
|
"grad_norm": 9.885619999004115,
|
|
"learning_rate": 9.551845929386417e-06,
|
|
"loss": 2.436250686645508,
|
|
"step": 1271
|
|
},
|
|
{
|
|
"epoch": 0.6680672268907563,
|
|
"grad_norm": 6.637540996061952,
|
|
"learning_rate": 9.55058050573354e-06,
|
|
"loss": 2.684870481491089,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.6685924369747899,
|
|
"grad_norm": 11.160500558974611,
|
|
"learning_rate": 9.549313382114427e-06,
|
|
"loss": 2.517242431640625,
|
|
"step": 1273
|
|
},
|
|
{
|
|
"epoch": 0.6691176470588235,
|
|
"grad_norm": 9.121291347461248,
|
|
"learning_rate": 9.548044559002439e-06,
|
|
"loss": 2.7190897464752197,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.6696428571428571,
|
|
"grad_norm": 10.318891847989532,
|
|
"learning_rate": 9.54677403687157e-06,
|
|
"loss": 3.291470766067505,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.6701680672268907,
|
|
"grad_norm": 20.547739071752503,
|
|
"learning_rate": 9.545501816196452e-06,
|
|
"loss": 2.768106698989868,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.6706932773109243,
|
|
"grad_norm": 6.558821932019025,
|
|
"learning_rate": 9.54422789745235e-06,
|
|
"loss": 2.909510612487793,
|
|
"step": 1277
|
|
},
|
|
{
|
|
"epoch": 0.6712184873949579,
|
|
"grad_norm": 17.38320702308684,
|
|
"learning_rate": 9.542952281115163e-06,
|
|
"loss": 3.3048973083496094,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.6717436974789915,
|
|
"grad_norm": 15.238255200305627,
|
|
"learning_rate": 9.541674967661424e-06,
|
|
"loss": 2.5014166831970215,
|
|
"step": 1279
|
|
},
|
|
{
|
|
"epoch": 0.6722689075630253,
|
|
"grad_norm": 12.917675640362509,
|
|
"learning_rate": 9.540395957568303e-06,
|
|
"loss": 2.706674575805664,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.6727941176470589,
|
|
"grad_norm": 11.220456694925208,
|
|
"learning_rate": 9.5391152513136e-06,
|
|
"loss": 3.067174196243286,
|
|
"step": 1281
|
|
},
|
|
{
|
|
"epoch": 0.6733193277310925,
|
|
"grad_norm": 13.304256076713015,
|
|
"learning_rate": 9.53783284937575e-06,
|
|
"loss": 2.654634475708008,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.6738445378151261,
|
|
"grad_norm": 20.843161074031297,
|
|
"learning_rate": 9.536548752233822e-06,
|
|
"loss": 3.2768654823303223,
|
|
"step": 1283
|
|
},
|
|
{
|
|
"epoch": 0.6743697478991597,
|
|
"grad_norm": 46.99042684881799,
|
|
"learning_rate": 9.535262960367517e-06,
|
|
"loss": 2.1927356719970703,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.6748949579831933,
|
|
"grad_norm": 25.37929267140401,
|
|
"learning_rate": 9.533975474257171e-06,
|
|
"loss": 3.0900778770446777,
|
|
"step": 1285
|
|
},
|
|
{
|
|
"epoch": 0.6754201680672269,
|
|
"grad_norm": 13.050063420656867,
|
|
"learning_rate": 9.53268629438375e-06,
|
|
"loss": 3.2228505611419678,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.6759453781512605,
|
|
"grad_norm": 11.1449400416276,
|
|
"learning_rate": 9.531395421228857e-06,
|
|
"loss": 2.9872071743011475,
|
|
"step": 1287
|
|
},
|
|
{
|
|
"epoch": 0.6764705882352942,
|
|
"grad_norm": 9.184043673654573,
|
|
"learning_rate": 9.530102855274723e-06,
|
|
"loss": 2.888235569000244,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.6769957983193278,
|
|
"grad_norm": 9.77657945924281,
|
|
"learning_rate": 9.528808597004216e-06,
|
|
"loss": 2.0844593048095703,
|
|
"step": 1289
|
|
},
|
|
{
|
|
"epoch": 0.6775210084033614,
|
|
"grad_norm": 11.846016703107276,
|
|
"learning_rate": 9.527512646900832e-06,
|
|
"loss": 2.4693167209625244,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.678046218487395,
|
|
"grad_norm": 17.739386299001715,
|
|
"learning_rate": 9.5262150054487e-06,
|
|
"loss": 2.933748722076416,
|
|
"step": 1291
|
|
},
|
|
{
|
|
"epoch": 0.6785714285714286,
|
|
"grad_norm": 6.279664041307915,
|
|
"learning_rate": 9.524915673132584e-06,
|
|
"loss": 2.602099895477295,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.6790966386554622,
|
|
"grad_norm": 9.629477822347484,
|
|
"learning_rate": 9.523614650437876e-06,
|
|
"loss": 2.6288552284240723,
|
|
"step": 1293
|
|
},
|
|
{
|
|
"epoch": 0.6796218487394958,
|
|
"grad_norm": 14.619570781758624,
|
|
"learning_rate": 9.522311937850599e-06,
|
|
"loss": 2.5347964763641357,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.6801470588235294,
|
|
"grad_norm": 16.33077187580003,
|
|
"learning_rate": 9.521007535857412e-06,
|
|
"loss": 2.9023921489715576,
|
|
"step": 1295
|
|
},
|
|
{
|
|
"epoch": 0.680672268907563,
|
|
"grad_norm": 12.231716396344995,
|
|
"learning_rate": 9.5197014449456e-06,
|
|
"loss": 2.02052640914917,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.6811974789915967,
|
|
"grad_norm": 11.85750275712929,
|
|
"learning_rate": 9.518393665603084e-06,
|
|
"loss": 2.457960844039917,
|
|
"step": 1297
|
|
},
|
|
{
|
|
"epoch": 0.6817226890756303,
|
|
"grad_norm": 45.000785607816304,
|
|
"learning_rate": 9.517084198318408e-06,
|
|
"loss": 2.775736093521118,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.6822478991596639,
|
|
"grad_norm": 11.848366686860444,
|
|
"learning_rate": 9.515773043580754e-06,
|
|
"loss": 2.7515206336975098,
|
|
"step": 1299
|
|
},
|
|
{
|
|
"epoch": 0.6827731092436975,
|
|
"grad_norm": 8.044014029627554,
|
|
"learning_rate": 9.514460201879933e-06,
|
|
"loss": 3.122994899749756,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6832983193277311,
|
|
"grad_norm": 7.709408715358316,
|
|
"learning_rate": 9.513145673706383e-06,
|
|
"loss": 2.763450860977173,
|
|
"step": 1301
|
|
},
|
|
{
|
|
"epoch": 0.6838235294117647,
|
|
"grad_norm": 16.596621425114005,
|
|
"learning_rate": 9.511829459551177e-06,
|
|
"loss": 1.674150824546814,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.6843487394957983,
|
|
"grad_norm": 16.944579906628196,
|
|
"learning_rate": 9.51051155990601e-06,
|
|
"loss": 3.0965023040771484,
|
|
"step": 1303
|
|
},
|
|
{
|
|
"epoch": 0.6848739495798319,
|
|
"grad_norm": 29.684456904202968,
|
|
"learning_rate": 9.509191975263214e-06,
|
|
"loss": 3.5322749614715576,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.6853991596638656,
|
|
"grad_norm": 20.17744729818729,
|
|
"learning_rate": 9.507870706115749e-06,
|
|
"loss": 4.938513278961182,
|
|
"step": 1305
|
|
},
|
|
{
|
|
"epoch": 0.6859243697478992,
|
|
"grad_norm": 9.05047495142821,
|
|
"learning_rate": 9.506547752957202e-06,
|
|
"loss": 2.6075875759124756,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.6864495798319328,
|
|
"grad_norm": 7.172005664837628,
|
|
"learning_rate": 9.505223116281792e-06,
|
|
"loss": 2.5574395656585693,
|
|
"step": 1307
|
|
},
|
|
{
|
|
"epoch": 0.6869747899159664,
|
|
"grad_norm": 5.460805864901304,
|
|
"learning_rate": 9.503896796584363e-06,
|
|
"loss": 1.2071025371551514,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.6875,
|
|
"grad_norm": 11.696722291676378,
|
|
"learning_rate": 9.50256879436039e-06,
|
|
"loss": 2.3375420570373535,
|
|
"step": 1309
|
|
},
|
|
{
|
|
"epoch": 0.6880252100840336,
|
|
"grad_norm": 15.393794232695843,
|
|
"learning_rate": 9.501239110105977e-06,
|
|
"loss": 2.381495237350464,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.6885504201680672,
|
|
"grad_norm": 8.985583895994047,
|
|
"learning_rate": 9.499907744317857e-06,
|
|
"loss": 2.9014463424682617,
|
|
"step": 1311
|
|
},
|
|
{
|
|
"epoch": 0.6890756302521008,
|
|
"grad_norm": 10.51140471946597,
|
|
"learning_rate": 9.49857469749339e-06,
|
|
"loss": 2.4108285903930664,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.6896008403361344,
|
|
"grad_norm": 23.733160801090765,
|
|
"learning_rate": 9.497239970130561e-06,
|
|
"loss": 2.544302225112915,
|
|
"step": 1313
|
|
},
|
|
{
|
|
"epoch": 0.6901260504201681,
|
|
"grad_norm": 10.113600080830354,
|
|
"learning_rate": 9.49590356272799e-06,
|
|
"loss": 3.2044105529785156,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.6906512605042017,
|
|
"grad_norm": 11.225461610266919,
|
|
"learning_rate": 9.494565475784918e-06,
|
|
"loss": 2.7142252922058105,
|
|
"step": 1315
|
|
},
|
|
{
|
|
"epoch": 0.6911764705882353,
|
|
"grad_norm": 12.805029589555923,
|
|
"learning_rate": 9.493225709801215e-06,
|
|
"loss": 2.8982505798339844,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.6917016806722689,
|
|
"grad_norm": 8.345505498879067,
|
|
"learning_rate": 9.491884265277383e-06,
|
|
"loss": 2.6981797218322754,
|
|
"step": 1317
|
|
},
|
|
{
|
|
"epoch": 0.6922268907563025,
|
|
"grad_norm": 12.937650980741134,
|
|
"learning_rate": 9.490541142714542e-06,
|
|
"loss": 2.7911643981933594,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.6927521008403361,
|
|
"grad_norm": 22.116933179052825,
|
|
"learning_rate": 9.489196342614447e-06,
|
|
"loss": 2.021183967590332,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 0.6932773109243697,
|
|
"grad_norm": 14.357157701259643,
|
|
"learning_rate": 9.487849865479477e-06,
|
|
"loss": 2.7909629344940186,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.6938025210084033,
|
|
"grad_norm": 10.90785986875978,
|
|
"learning_rate": 9.486501711812637e-06,
|
|
"loss": 2.944089889526367,
|
|
"step": 1321
|
|
},
|
|
{
|
|
"epoch": 0.694327731092437,
|
|
"grad_norm": 15.35413406540934,
|
|
"learning_rate": 9.485151882117556e-06,
|
|
"loss": 3.075840950012207,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.6948529411764706,
|
|
"grad_norm": 10.921413024324112,
|
|
"learning_rate": 9.483800376898496e-06,
|
|
"loss": 2.9666800498962402,
|
|
"step": 1323
|
|
},
|
|
{
|
|
"epoch": 0.6953781512605042,
|
|
"grad_norm": 6.931937472194988,
|
|
"learning_rate": 9.482447196660338e-06,
|
|
"loss": 2.744246006011963,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.6959033613445378,
|
|
"grad_norm": 22.815648595154784,
|
|
"learning_rate": 9.481092341908591e-06,
|
|
"loss": 2.2377512454986572,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.6964285714285714,
|
|
"grad_norm": 33.745252950177466,
|
|
"learning_rate": 9.47973581314939e-06,
|
|
"loss": 3.381443500518799,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.696953781512605,
|
|
"grad_norm": 13.747870396917808,
|
|
"learning_rate": 9.478377610889495e-06,
|
|
"loss": 3.339777708053589,
|
|
"step": 1327
|
|
},
|
|
{
|
|
"epoch": 0.6974789915966386,
|
|
"grad_norm": 14.754600586020432,
|
|
"learning_rate": 9.477017735636294e-06,
|
|
"loss": 2.6213040351867676,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.6980042016806722,
|
|
"grad_norm": 14.243546829313178,
|
|
"learning_rate": 9.475656187897794e-06,
|
|
"loss": 2.7175559997558594,
|
|
"step": 1329
|
|
},
|
|
{
|
|
"epoch": 0.6985294117647058,
|
|
"grad_norm": 17.71429842481506,
|
|
"learning_rate": 9.47429296818263e-06,
|
|
"loss": 2.6168465614318848,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.6990546218487395,
|
|
"grad_norm": 10.504564201612084,
|
|
"learning_rate": 9.472928077000066e-06,
|
|
"loss": 2.1036758422851562,
|
|
"step": 1331
|
|
},
|
|
{
|
|
"epoch": 0.6995798319327731,
|
|
"grad_norm": 12.309961034642136,
|
|
"learning_rate": 9.47156151485998e-06,
|
|
"loss": 2.7158048152923584,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.7001050420168067,
|
|
"grad_norm": 7.9598002812863164,
|
|
"learning_rate": 9.470193282272886e-06,
|
|
"loss": 2.008297920227051,
|
|
"step": 1333
|
|
},
|
|
{
|
|
"epoch": 0.7006302521008403,
|
|
"grad_norm": 12.860207803140918,
|
|
"learning_rate": 9.468823379749915e-06,
|
|
"loss": 2.714076519012451,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.7011554621848739,
|
|
"grad_norm": 17.560070558918074,
|
|
"learning_rate": 9.467451807802821e-06,
|
|
"loss": 2.544034481048584,
|
|
"step": 1335
|
|
},
|
|
{
|
|
"epoch": 0.7016806722689075,
|
|
"grad_norm": 17.977807037833866,
|
|
"learning_rate": 9.466078566943985e-06,
|
|
"loss": 2.744539737701416,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.7022058823529411,
|
|
"grad_norm": 18.045075243022165,
|
|
"learning_rate": 9.464703657686412e-06,
|
|
"loss": 2.770423412322998,
|
|
"step": 1337
|
|
},
|
|
{
|
|
"epoch": 0.7027310924369747,
|
|
"grad_norm": 8.796807059905332,
|
|
"learning_rate": 9.463327080543726e-06,
|
|
"loss": 2.601559638977051,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.7032563025210085,
|
|
"grad_norm": 9.770323235779015,
|
|
"learning_rate": 9.461948836030179e-06,
|
|
"loss": 3.0226902961730957,
|
|
"step": 1339
|
|
},
|
|
{
|
|
"epoch": 0.7037815126050421,
|
|
"grad_norm": 9.670867365270533,
|
|
"learning_rate": 9.460568924660642e-06,
|
|
"loss": 2.2374281883239746,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.7043067226890757,
|
|
"grad_norm": 11.038223135908881,
|
|
"learning_rate": 9.45918734695061e-06,
|
|
"loss": 2.7864830493927,
|
|
"step": 1341
|
|
},
|
|
{
|
|
"epoch": 0.7048319327731093,
|
|
"grad_norm": 16.745596707482306,
|
|
"learning_rate": 9.457804103416201e-06,
|
|
"loss": 2.5133237838745117,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.7053571428571429,
|
|
"grad_norm": 11.036184780862653,
|
|
"learning_rate": 9.456419194574158e-06,
|
|
"loss": 2.864564895629883,
|
|
"step": 1343
|
|
},
|
|
{
|
|
"epoch": 0.7058823529411765,
|
|
"grad_norm": 19.094604602634462,
|
|
"learning_rate": 9.45503262094184e-06,
|
|
"loss": 3.54667067527771,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.7064075630252101,
|
|
"grad_norm": 12.94870110586718,
|
|
"learning_rate": 9.453644383037232e-06,
|
|
"loss": 2.3254120349884033,
|
|
"step": 1345
|
|
},
|
|
{
|
|
"epoch": 0.7069327731092437,
|
|
"grad_norm": 14.861646673492746,
|
|
"learning_rate": 9.45225448137894e-06,
|
|
"loss": 2.5209128856658936,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.7074579831932774,
|
|
"grad_norm": 13.398996249782893,
|
|
"learning_rate": 9.45086291648619e-06,
|
|
"loss": 2.752837657928467,
|
|
"step": 1347
|
|
},
|
|
{
|
|
"epoch": 0.707983193277311,
|
|
"grad_norm": 11.404093547038189,
|
|
"learning_rate": 9.449469688878833e-06,
|
|
"loss": 2.8514773845672607,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.7085084033613446,
|
|
"grad_norm": 7.661170507555201,
|
|
"learning_rate": 9.448074799077337e-06,
|
|
"loss": 2.347379684448242,
|
|
"step": 1349
|
|
},
|
|
{
|
|
"epoch": 0.7090336134453782,
|
|
"grad_norm": 12.638915089631368,
|
|
"learning_rate": 9.446678247602794e-06,
|
|
"loss": 2.823115348815918,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.7095588235294118,
|
|
"grad_norm": 18.12805299575099,
|
|
"learning_rate": 9.445280034976916e-06,
|
|
"loss": 2.608738422393799,
|
|
"step": 1351
|
|
},
|
|
{
|
|
"epoch": 0.7100840336134454,
|
|
"grad_norm": 8.642232967922208,
|
|
"learning_rate": 9.443880161722032e-06,
|
|
"loss": 3.023998737335205,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.710609243697479,
|
|
"grad_norm": 19.50477675862717,
|
|
"learning_rate": 9.442478628361098e-06,
|
|
"loss": 2.913578748703003,
|
|
"step": 1353
|
|
},
|
|
{
|
|
"epoch": 0.7111344537815126,
|
|
"grad_norm": 21.706233749621184,
|
|
"learning_rate": 9.441075435417683e-06,
|
|
"loss": 3.0246872901916504,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.7116596638655462,
|
|
"grad_norm": 17.67378038551585,
|
|
"learning_rate": 9.439670583415984e-06,
|
|
"loss": 3.0222532749176025,
|
|
"step": 1355
|
|
},
|
|
{
|
|
"epoch": 0.7121848739495799,
|
|
"grad_norm": 8.837601410314631,
|
|
"learning_rate": 9.438264072880811e-06,
|
|
"loss": 2.9659714698791504,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.7127100840336135,
|
|
"grad_norm": 15.012524158066952,
|
|
"learning_rate": 9.436855904337596e-06,
|
|
"loss": 2.4705705642700195,
|
|
"step": 1357
|
|
},
|
|
{
|
|
"epoch": 0.7132352941176471,
|
|
"grad_norm": 27.40170647298477,
|
|
"learning_rate": 9.435446078312389e-06,
|
|
"loss": 2.984248161315918,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.7137605042016807,
|
|
"grad_norm": 13.497646711735237,
|
|
"learning_rate": 9.434034595331863e-06,
|
|
"loss": 3.160773515701294,
|
|
"step": 1359
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 9.35232386887726,
|
|
"learning_rate": 9.432621455923308e-06,
|
|
"loss": 2.279153347015381,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.7148109243697479,
|
|
"grad_norm": 12.189984058563963,
|
|
"learning_rate": 9.43120666061463e-06,
|
|
"loss": 2.089343547821045,
|
|
"step": 1361
|
|
},
|
|
{
|
|
"epoch": 0.7153361344537815,
|
|
"grad_norm": 5.050789962910078,
|
|
"learning_rate": 9.429790209934355e-06,
|
|
"loss": 2.4052462577819824,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.7158613445378151,
|
|
"grad_norm": 8.971997861991584,
|
|
"learning_rate": 9.428372104411632e-06,
|
|
"loss": 2.5842559337615967,
|
|
"step": 1363
|
|
},
|
|
{
|
|
"epoch": 0.7163865546218487,
|
|
"grad_norm": 12.103027223944814,
|
|
"learning_rate": 9.426952344576222e-06,
|
|
"loss": 2.938389301300049,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.7169117647058824,
|
|
"grad_norm": 11.86887475831686,
|
|
"learning_rate": 9.425530930958507e-06,
|
|
"loss": 3.1199917793273926,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 0.717436974789916,
|
|
"grad_norm": 6.708273631905648,
|
|
"learning_rate": 9.424107864089485e-06,
|
|
"loss": 2.9766764640808105,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.7179621848739496,
|
|
"grad_norm": 8.621264404336527,
|
|
"learning_rate": 9.422683144500775e-06,
|
|
"loss": 2.8513436317443848,
|
|
"step": 1367
|
|
},
|
|
{
|
|
"epoch": 0.7184873949579832,
|
|
"grad_norm": 15.458155449734308,
|
|
"learning_rate": 9.421256772724612e-06,
|
|
"loss": 3.151705503463745,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.7190126050420168,
|
|
"grad_norm": 14.290268675960716,
|
|
"learning_rate": 9.419828749293845e-06,
|
|
"loss": 2.5013177394866943,
|
|
"step": 1369
|
|
},
|
|
{
|
|
"epoch": 0.7195378151260504,
|
|
"grad_norm": 18.682865499597426,
|
|
"learning_rate": 9.418399074741943e-06,
|
|
"loss": 2.460247755050659,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.720063025210084,
|
|
"grad_norm": 24.451169695375615,
|
|
"learning_rate": 9.416967749602996e-06,
|
|
"loss": 2.5819268226623535,
|
|
"step": 1371
|
|
},
|
|
{
|
|
"epoch": 0.7205882352941176,
|
|
"grad_norm": 10.02373184540919,
|
|
"learning_rate": 9.4155347744117e-06,
|
|
"loss": 2.1001505851745605,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.7211134453781513,
|
|
"grad_norm": 8.777736770119411,
|
|
"learning_rate": 9.414100149703373e-06,
|
|
"loss": 2.957872152328491,
|
|
"step": 1373
|
|
},
|
|
{
|
|
"epoch": 0.7216386554621849,
|
|
"grad_norm": 8.456445117374907,
|
|
"learning_rate": 9.412663876013954e-06,
|
|
"loss": 2.311070442199707,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.7221638655462185,
|
|
"grad_norm": 11.263860119699258,
|
|
"learning_rate": 9.411225953879993e-06,
|
|
"loss": 2.642958641052246,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.7226890756302521,
|
|
"grad_norm": 12.662610939522205,
|
|
"learning_rate": 9.409786383838653e-06,
|
|
"loss": 3.2040657997131348,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.7232142857142857,
|
|
"grad_norm": 9.783216528684383,
|
|
"learning_rate": 9.40834516642772e-06,
|
|
"loss": 2.6354026794433594,
|
|
"step": 1377
|
|
},
|
|
{
|
|
"epoch": 0.7237394957983193,
|
|
"grad_norm": 13.818754228405075,
|
|
"learning_rate": 9.406902302185587e-06,
|
|
"loss": 2.6107289791107178,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.7242647058823529,
|
|
"grad_norm": 15.054292525217258,
|
|
"learning_rate": 9.405457791651272e-06,
|
|
"loss": 2.8895740509033203,
|
|
"step": 1379
|
|
},
|
|
{
|
|
"epoch": 0.7247899159663865,
|
|
"grad_norm": 11.901605027729738,
|
|
"learning_rate": 9.4040116353644e-06,
|
|
"loss": 2.9344370365142822,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.7253151260504201,
|
|
"grad_norm": 10.969129039598197,
|
|
"learning_rate": 9.402563833865213e-06,
|
|
"loss": 2.6658058166503906,
|
|
"step": 1381
|
|
},
|
|
{
|
|
"epoch": 0.7258403361344538,
|
|
"grad_norm": 12.989795592487805,
|
|
"learning_rate": 9.401114387694568e-06,
|
|
"loss": 3.1829166412353516,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.7263655462184874,
|
|
"grad_norm": 16.549151471583183,
|
|
"learning_rate": 9.399663297393937e-06,
|
|
"loss": 3.0087943077087402,
|
|
"step": 1383
|
|
},
|
|
{
|
|
"epoch": 0.726890756302521,
|
|
"grad_norm": 10.953146233522348,
|
|
"learning_rate": 9.398210563505405e-06,
|
|
"loss": 2.890185832977295,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.7274159663865546,
|
|
"grad_norm": 33.01528017378158,
|
|
"learning_rate": 9.396756186571672e-06,
|
|
"loss": 1.9646806716918945,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 0.7279411764705882,
|
|
"grad_norm": 11.884339298915373,
|
|
"learning_rate": 9.395300167136055e-06,
|
|
"loss": 2.821507453918457,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.7284663865546218,
|
|
"grad_norm": 32.47387416330914,
|
|
"learning_rate": 9.393842505742474e-06,
|
|
"loss": 2.535316228866577,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 0.7289915966386554,
|
|
"grad_norm": 9.075975777726198,
|
|
"learning_rate": 9.392383202935476e-06,
|
|
"loss": 2.6755433082580566,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.729516806722689,
|
|
"grad_norm": 12.860652796524104,
|
|
"learning_rate": 9.39092225926021e-06,
|
|
"loss": 2.2261745929718018,
|
|
"step": 1389
|
|
},
|
|
{
|
|
"epoch": 0.7300420168067226,
|
|
"grad_norm": 8.64320308642805,
|
|
"learning_rate": 9.389459675262446e-06,
|
|
"loss": 2.6248598098754883,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.7305672268907563,
|
|
"grad_norm": 10.380043046434807,
|
|
"learning_rate": 9.387995451488561e-06,
|
|
"loss": 3.0444443225860596,
|
|
"step": 1391
|
|
},
|
|
{
|
|
"epoch": 0.7310924369747899,
|
|
"grad_norm": 7.138907418937429,
|
|
"learning_rate": 9.386529588485549e-06,
|
|
"loss": 3.195737361907959,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.7316176470588235,
|
|
"grad_norm": 28.015383111308655,
|
|
"learning_rate": 9.385062086801013e-06,
|
|
"loss": 2.8851213455200195,
|
|
"step": 1393
|
|
},
|
|
{
|
|
"epoch": 0.7321428571428571,
|
|
"grad_norm": 10.214467168943983,
|
|
"learning_rate": 9.38359294698317e-06,
|
|
"loss": 2.9917964935302734,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.7326680672268907,
|
|
"grad_norm": 17.77067206107327,
|
|
"learning_rate": 9.382122169580848e-06,
|
|
"loss": 2.2778615951538086,
|
|
"step": 1395
|
|
},
|
|
{
|
|
"epoch": 0.7331932773109243,
|
|
"grad_norm": 7.571215813251031,
|
|
"learning_rate": 9.380649755143488e-06,
|
|
"loss": 2.527552366256714,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.7337184873949579,
|
|
"grad_norm": 7.717230317008868,
|
|
"learning_rate": 9.379175704221139e-06,
|
|
"loss": 1.7584331035614014,
|
|
"step": 1397
|
|
},
|
|
{
|
|
"epoch": 0.7342436974789915,
|
|
"grad_norm": 11.353245551534629,
|
|
"learning_rate": 9.377700017364469e-06,
|
|
"loss": 1.99480402469635,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.7347689075630253,
|
|
"grad_norm": 8.640384284021444,
|
|
"learning_rate": 9.376222695124748e-06,
|
|
"loss": 2.5055298805236816,
|
|
"step": 1399
|
|
},
|
|
{
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 13.5071496145152,
|
|
"learning_rate": 9.374743738053862e-06,
|
|
"loss": 2.9797515869140625,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.7358193277310925,
|
|
"grad_norm": 15.081587837232423,
|
|
"learning_rate": 9.37326314670431e-06,
|
|
"loss": 2.2903127670288086,
|
|
"step": 1401
|
|
},
|
|
{
|
|
"epoch": 0.7363445378151261,
|
|
"grad_norm": 10.918947750645197,
|
|
"learning_rate": 9.371780921629195e-06,
|
|
"loss": 2.863656520843506,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.7368697478991597,
|
|
"grad_norm": 10.670799315171976,
|
|
"learning_rate": 9.370297063382235e-06,
|
|
"loss": 2.4964540004730225,
|
|
"step": 1403
|
|
},
|
|
{
|
|
"epoch": 0.7373949579831933,
|
|
"grad_norm": 9.520859840288391,
|
|
"learning_rate": 9.368811572517756e-06,
|
|
"loss": 2.362823724746704,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.7379201680672269,
|
|
"grad_norm": 6.835728974180949,
|
|
"learning_rate": 9.367324449590694e-06,
|
|
"loss": 3.0204315185546875,
|
|
"step": 1405
|
|
},
|
|
{
|
|
"epoch": 0.7384453781512605,
|
|
"grad_norm": 14.863406548725411,
|
|
"learning_rate": 9.3658356951566e-06,
|
|
"loss": 2.7434098720550537,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.7389705882352942,
|
|
"grad_norm": 12.858452907659908,
|
|
"learning_rate": 9.364345309771624e-06,
|
|
"loss": 2.655700445175171,
|
|
"step": 1407
|
|
},
|
|
{
|
|
"epoch": 0.7394957983193278,
|
|
"grad_norm": 14.65548674711664,
|
|
"learning_rate": 9.362853293992535e-06,
|
|
"loss": 2.302583694458008,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.7400210084033614,
|
|
"grad_norm": 8.260087260299908,
|
|
"learning_rate": 9.361359648376707e-06,
|
|
"loss": 3.3217902183532715,
|
|
"step": 1409
|
|
},
|
|
{
|
|
"epoch": 0.740546218487395,
|
|
"grad_norm": 15.076035124946063,
|
|
"learning_rate": 9.359864373482122e-06,
|
|
"loss": 3.1235344409942627,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.7410714285714286,
|
|
"grad_norm": 10.722930307754114,
|
|
"learning_rate": 9.358367469867372e-06,
|
|
"loss": 2.8075900077819824,
|
|
"step": 1411
|
|
},
|
|
{
|
|
"epoch": 0.7415966386554622,
|
|
"grad_norm": 12.983006343518566,
|
|
"learning_rate": 9.356868938091655e-06,
|
|
"loss": 2.9995510578155518,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.7421218487394958,
|
|
"grad_norm": 9.379237597493933,
|
|
"learning_rate": 9.355368778714784e-06,
|
|
"loss": 2.6002068519592285,
|
|
"step": 1413
|
|
},
|
|
{
|
|
"epoch": 0.7426470588235294,
|
|
"grad_norm": 26.27711212533193,
|
|
"learning_rate": 9.353866992297174e-06,
|
|
"loss": 2.375796318054199,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.743172268907563,
|
|
"grad_norm": 11.222766076275274,
|
|
"learning_rate": 9.352363579399846e-06,
|
|
"loss": 2.8118574619293213,
|
|
"step": 1415
|
|
},
|
|
{
|
|
"epoch": 0.7436974789915967,
|
|
"grad_norm": 13.51524891050891,
|
|
"learning_rate": 9.350858540584437e-06,
|
|
"loss": 3.0930118560791016,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.7442226890756303,
|
|
"grad_norm": 13.005699518462123,
|
|
"learning_rate": 9.349351876413181e-06,
|
|
"loss": 3.2080953121185303,
|
|
"step": 1417
|
|
},
|
|
{
|
|
"epoch": 0.7447478991596639,
|
|
"grad_norm": 9.71357191671031,
|
|
"learning_rate": 9.347843587448931e-06,
|
|
"loss": 2.5146827697753906,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.7452731092436975,
|
|
"grad_norm": 8.728947305262524,
|
|
"learning_rate": 9.346333674255132e-06,
|
|
"loss": 2.7985026836395264,
|
|
"step": 1419
|
|
},
|
|
{
|
|
"epoch": 0.7457983193277311,
|
|
"grad_norm": 12.299315161082067,
|
|
"learning_rate": 9.344822137395853e-06,
|
|
"loss": 2.5758912563323975,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.7463235294117647,
|
|
"grad_norm": 17.672666297977287,
|
|
"learning_rate": 9.343308977435754e-06,
|
|
"loss": 2.5825915336608887,
|
|
"step": 1421
|
|
},
|
|
{
|
|
"epoch": 0.7468487394957983,
|
|
"grad_norm": 12.718365767124839,
|
|
"learning_rate": 9.34179419494011e-06,
|
|
"loss": 2.511111259460449,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.7473739495798319,
|
|
"grad_norm": 14.565970508358097,
|
|
"learning_rate": 9.340277790474804e-06,
|
|
"loss": 2.6805567741394043,
|
|
"step": 1423
|
|
},
|
|
{
|
|
"epoch": 0.7478991596638656,
|
|
"grad_norm": 11.315382129257491,
|
|
"learning_rate": 9.338759764606318e-06,
|
|
"loss": 2.303293228149414,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.7484243697478992,
|
|
"grad_norm": 8.386154954984145,
|
|
"learning_rate": 9.337240117901742e-06,
|
|
"loss": 2.525210380554199,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.7489495798319328,
|
|
"grad_norm": 15.996249867978909,
|
|
"learning_rate": 9.335718850928772e-06,
|
|
"loss": 2.724491834640503,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.7494747899159664,
|
|
"grad_norm": 13.992497994363099,
|
|
"learning_rate": 9.334195964255713e-06,
|
|
"loss": 2.6540184020996094,
|
|
"step": 1427
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 9.7380823568118,
|
|
"learning_rate": 9.33267145845147e-06,
|
|
"loss": 3.3706037998199463,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.7505252100840336,
|
|
"grad_norm": 12.893009723038702,
|
|
"learning_rate": 9.331145334085554e-06,
|
|
"loss": 3.2180933952331543,
|
|
"step": 1429
|
|
},
|
|
{
|
|
"epoch": 0.7510504201680672,
|
|
"grad_norm": 10.33314172045796,
|
|
"learning_rate": 9.32961759172808e-06,
|
|
"loss": 2.725083827972412,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.7515756302521008,
|
|
"grad_norm": 12.770106629086934,
|
|
"learning_rate": 9.328088231949773e-06,
|
|
"loss": 2.0833425521850586,
|
|
"step": 1431
|
|
},
|
|
{
|
|
"epoch": 0.7521008403361344,
|
|
"grad_norm": 10.541104698964073,
|
|
"learning_rate": 9.326557255321955e-06,
|
|
"loss": 2.642286539077759,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.7526260504201681,
|
|
"grad_norm": 10.584788013068913,
|
|
"learning_rate": 9.325024662416553e-06,
|
|
"loss": 3.445159435272217,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 0.7531512605042017,
|
|
"grad_norm": 11.674629230911592,
|
|
"learning_rate": 9.323490453806105e-06,
|
|
"loss": 3.027078628540039,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.7536764705882353,
|
|
"grad_norm": 13.392299254395775,
|
|
"learning_rate": 9.321954630063742e-06,
|
|
"loss": 3.040900468826294,
|
|
"step": 1435
|
|
},
|
|
{
|
|
"epoch": 0.7542016806722689,
|
|
"grad_norm": 8.216103167302693,
|
|
"learning_rate": 9.320417191763207e-06,
|
|
"loss": 2.767831563949585,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.7547268907563025,
|
|
"grad_norm": 13.229314420547931,
|
|
"learning_rate": 9.318878139478842e-06,
|
|
"loss": 2.596747398376465,
|
|
"step": 1437
|
|
},
|
|
{
|
|
"epoch": 0.7552521008403361,
|
|
"grad_norm": 10.64212348650333,
|
|
"learning_rate": 9.31733747378559e-06,
|
|
"loss": 3.714005947113037,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.7557773109243697,
|
|
"grad_norm": 15.23119062631491,
|
|
"learning_rate": 9.315795195259003e-06,
|
|
"loss": 3.0364737510681152,
|
|
"step": 1439
|
|
},
|
|
{
|
|
"epoch": 0.7563025210084033,
|
|
"grad_norm": 17.52760220774232,
|
|
"learning_rate": 9.314251304475233e-06,
|
|
"loss": 3.9018704891204834,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.756827731092437,
|
|
"grad_norm": 14.535598647006452,
|
|
"learning_rate": 9.312705802011029e-06,
|
|
"loss": 2.916362762451172,
|
|
"step": 1441
|
|
},
|
|
{
|
|
"epoch": 0.7573529411764706,
|
|
"grad_norm": 15.613857086491242,
|
|
"learning_rate": 9.31115868844375e-06,
|
|
"loss": 2.829233169555664,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.7578781512605042,
|
|
"grad_norm": 18.91393207585664,
|
|
"learning_rate": 9.30960996435135e-06,
|
|
"loss": 3.149411678314209,
|
|
"step": 1443
|
|
},
|
|
{
|
|
"epoch": 0.7584033613445378,
|
|
"grad_norm": 20.954511727143665,
|
|
"learning_rate": 9.308059630312391e-06,
|
|
"loss": 2.4108259677886963,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.7589285714285714,
|
|
"grad_norm": 13.117738732374297,
|
|
"learning_rate": 9.306507686906033e-06,
|
|
"loss": 3.152261257171631,
|
|
"step": 1445
|
|
},
|
|
{
|
|
"epoch": 0.759453781512605,
|
|
"grad_norm": 11.940719191817243,
|
|
"learning_rate": 9.304954134712034e-06,
|
|
"loss": 2.6755151748657227,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.7599789915966386,
|
|
"grad_norm": 6.49025082292054,
|
|
"learning_rate": 9.30339897431076e-06,
|
|
"loss": 2.705385446548462,
|
|
"step": 1447
|
|
},
|
|
{
|
|
"epoch": 0.7605042016806722,
|
|
"grad_norm": 12.99172427229987,
|
|
"learning_rate": 9.301842206283173e-06,
|
|
"loss": 2.8926711082458496,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.7610294117647058,
|
|
"grad_norm": 12.412417581013589,
|
|
"learning_rate": 9.300283831210838e-06,
|
|
"loss": 2.6391053199768066,
|
|
"step": 1449
|
|
},
|
|
{
|
|
"epoch": 0.7615546218487395,
|
|
"grad_norm": 5.9448044218751,
|
|
"learning_rate": 9.298723849675916e-06,
|
|
"loss": 2.8540122509002686,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.7620798319327731,
|
|
"grad_norm": 10.00337964899393,
|
|
"learning_rate": 9.297162262261174e-06,
|
|
"loss": 2.409593343734741,
|
|
"step": 1451
|
|
},
|
|
{
|
|
"epoch": 0.7626050420168067,
|
|
"grad_norm": 11.179943644593255,
|
|
"learning_rate": 9.295599069549977e-06,
|
|
"loss": 2.9136595726013184,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.7631302521008403,
|
|
"grad_norm": 12.333575441933972,
|
|
"learning_rate": 9.294034272126286e-06,
|
|
"loss": 3.040872097015381,
|
|
"step": 1453
|
|
},
|
|
{
|
|
"epoch": 0.7636554621848739,
|
|
"grad_norm": 7.27507806736491,
|
|
"learning_rate": 9.292467870574669e-06,
|
|
"loss": 2.6332709789276123,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.7641806722689075,
|
|
"grad_norm": 6.542470785682123,
|
|
"learning_rate": 9.290899865480283e-06,
|
|
"loss": 3.02650785446167,
|
|
"step": 1455
|
|
},
|
|
{
|
|
"epoch": 0.7647058823529411,
|
|
"grad_norm": 9.093817469440369,
|
|
"learning_rate": 9.289330257428894e-06,
|
|
"loss": 2.564438819885254,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.7652310924369747,
|
|
"grad_norm": 13.11724678279817,
|
|
"learning_rate": 9.28775904700686e-06,
|
|
"loss": 2.877781629562378,
|
|
"step": 1457
|
|
},
|
|
{
|
|
"epoch": 0.7657563025210085,
|
|
"grad_norm": 7.9570471475054845,
|
|
"learning_rate": 9.286186234801142e-06,
|
|
"loss": 3.0255775451660156,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.7662815126050421,
|
|
"grad_norm": 12.617675806442744,
|
|
"learning_rate": 9.284611821399295e-06,
|
|
"loss": 3.3234949111938477,
|
|
"step": 1459
|
|
},
|
|
{
|
|
"epoch": 0.7668067226890757,
|
|
"grad_norm": 7.22554471917852,
|
|
"learning_rate": 9.283035807389477e-06,
|
|
"loss": 2.3798437118530273,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.7673319327731093,
|
|
"grad_norm": 10.132569658780335,
|
|
"learning_rate": 9.281458193360442e-06,
|
|
"loss": 2.7522311210632324,
|
|
"step": 1461
|
|
},
|
|
{
|
|
"epoch": 0.7678571428571429,
|
|
"grad_norm": 12.685584791423695,
|
|
"learning_rate": 9.279878979901538e-06,
|
|
"loss": 2.3602542877197266,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.7683823529411765,
|
|
"grad_norm": 21.579192126219223,
|
|
"learning_rate": 9.278298167602716e-06,
|
|
"loss": 3.0811705589294434,
|
|
"step": 1463
|
|
},
|
|
{
|
|
"epoch": 0.7689075630252101,
|
|
"grad_norm": 16.272382663327445,
|
|
"learning_rate": 9.276715757054523e-06,
|
|
"loss": 2.832975387573242,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.7694327731092437,
|
|
"grad_norm": 11.503856614346121,
|
|
"learning_rate": 9.2751317488481e-06,
|
|
"loss": 2.9317173957824707,
|
|
"step": 1465
|
|
},
|
|
{
|
|
"epoch": 0.7699579831932774,
|
|
"grad_norm": 11.352763244351618,
|
|
"learning_rate": 9.27354614357519e-06,
|
|
"loss": 2.6808085441589355,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.770483193277311,
|
|
"grad_norm": 13.73363768149438,
|
|
"learning_rate": 9.271958941828125e-06,
|
|
"loss": 3.0309529304504395,
|
|
"step": 1467
|
|
},
|
|
{
|
|
"epoch": 0.7710084033613446,
|
|
"grad_norm": 11.346693259463262,
|
|
"learning_rate": 9.270370144199843e-06,
|
|
"loss": 3.280457019805908,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.7715336134453782,
|
|
"grad_norm": 9.832419580041483,
|
|
"learning_rate": 9.26877975128387e-06,
|
|
"loss": 2.869136333465576,
|
|
"step": 1469
|
|
},
|
|
{
|
|
"epoch": 0.7720588235294118,
|
|
"grad_norm": 8.297634210710237,
|
|
"learning_rate": 9.267187763674332e-06,
|
|
"loss": 2.3520267009735107,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.7725840336134454,
|
|
"grad_norm": 9.492541847936554,
|
|
"learning_rate": 9.26559418196595e-06,
|
|
"loss": 2.6950676441192627,
|
|
"step": 1471
|
|
},
|
|
{
|
|
"epoch": 0.773109243697479,
|
|
"grad_norm": 7.0219316314305305,
|
|
"learning_rate": 9.263999006754041e-06,
|
|
"loss": 2.259110927581787,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.7736344537815126,
|
|
"grad_norm": 17.116096402526342,
|
|
"learning_rate": 9.262402238634514e-06,
|
|
"loss": 3.1345925331115723,
|
|
"step": 1473
|
|
},
|
|
{
|
|
"epoch": 0.7741596638655462,
|
|
"grad_norm": 13.56774431196218,
|
|
"learning_rate": 9.26080387820388e-06,
|
|
"loss": 2.7827768325805664,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.7746848739495799,
|
|
"grad_norm": 14.308155016498903,
|
|
"learning_rate": 9.259203926059237e-06,
|
|
"loss": 2.0597972869873047,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.7752100840336135,
|
|
"grad_norm": 8.428954987828398,
|
|
"learning_rate": 9.257602382798283e-06,
|
|
"loss": 3.4040629863739014,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.7757352941176471,
|
|
"grad_norm": 28.692532317833248,
|
|
"learning_rate": 9.255999249019307e-06,
|
|
"loss": 2.8586981296539307,
|
|
"step": 1477
|
|
},
|
|
{
|
|
"epoch": 0.7762605042016807,
|
|
"grad_norm": 8.195887413422152,
|
|
"learning_rate": 9.254394525321195e-06,
|
|
"loss": 3.0691356658935547,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.7767857142857143,
|
|
"grad_norm": 9.057303498856644,
|
|
"learning_rate": 9.252788212303426e-06,
|
|
"loss": 2.7064573764801025,
|
|
"step": 1479
|
|
},
|
|
{
|
|
"epoch": 0.7773109243697479,
|
|
"grad_norm": 7.786703582739036,
|
|
"learning_rate": 9.251180310566073e-06,
|
|
"loss": 2.972693681716919,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.7778361344537815,
|
|
"grad_norm": 12.046110411363257,
|
|
"learning_rate": 9.2495708207098e-06,
|
|
"loss": 3.323434591293335,
|
|
"step": 1481
|
|
},
|
|
{
|
|
"epoch": 0.7783613445378151,
|
|
"grad_norm": 13.599940686488399,
|
|
"learning_rate": 9.247959743335865e-06,
|
|
"loss": 2.790578842163086,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.7788865546218487,
|
|
"grad_norm": 20.60611191528932,
|
|
"learning_rate": 9.246347079046124e-06,
|
|
"loss": 2.6960320472717285,
|
|
"step": 1483
|
|
},
|
|
{
|
|
"epoch": 0.7794117647058824,
|
|
"grad_norm": 10.075570960784976,
|
|
"learning_rate": 9.244732828443021e-06,
|
|
"loss": 2.6370906829833984,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.779936974789916,
|
|
"grad_norm": 18.088967248656378,
|
|
"learning_rate": 9.243116992129593e-06,
|
|
"loss": 3.318084716796875,
|
|
"step": 1485
|
|
},
|
|
{
|
|
"epoch": 0.7804621848739496,
|
|
"grad_norm": 10.461301597730223,
|
|
"learning_rate": 9.241499570709468e-06,
|
|
"loss": 3.348841428756714,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.7809873949579832,
|
|
"grad_norm": 10.365807099386126,
|
|
"learning_rate": 9.239880564786871e-06,
|
|
"loss": 2.163569211959839,
|
|
"step": 1487
|
|
},
|
|
{
|
|
"epoch": 0.7815126050420168,
|
|
"grad_norm": 8.677495079018977,
|
|
"learning_rate": 9.238259974966617e-06,
|
|
"loss": 1.077256441116333,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.7820378151260504,
|
|
"grad_norm": 6.116014149170227,
|
|
"learning_rate": 9.23663780185411e-06,
|
|
"loss": 2.9423060417175293,
|
|
"step": 1489
|
|
},
|
|
{
|
|
"epoch": 0.782563025210084,
|
|
"grad_norm": 12.069884185295985,
|
|
"learning_rate": 9.235014046055347e-06,
|
|
"loss": 2.5108654499053955,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.7830882352941176,
|
|
"grad_norm": 18.854346897658274,
|
|
"learning_rate": 9.233388708176918e-06,
|
|
"loss": 2.888367176055908,
|
|
"step": 1491
|
|
},
|
|
{
|
|
"epoch": 0.7836134453781513,
|
|
"grad_norm": 10.104713621849985,
|
|
"learning_rate": 9.231761788826e-06,
|
|
"loss": 2.9687886238098145,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.7841386554621849,
|
|
"grad_norm": 15.259579316561895,
|
|
"learning_rate": 9.230133288610366e-06,
|
|
"loss": 2.781655788421631,
|
|
"step": 1493
|
|
},
|
|
{
|
|
"epoch": 0.7846638655462185,
|
|
"grad_norm": 9.22616047144127,
|
|
"learning_rate": 9.228503208138377e-06,
|
|
"loss": 2.727569818496704,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.7851890756302521,
|
|
"grad_norm": 12.794092005035468,
|
|
"learning_rate": 9.226871548018982e-06,
|
|
"loss": 2.737361431121826,
|
|
"step": 1495
|
|
},
|
|
{
|
|
"epoch": 0.7857142857142857,
|
|
"grad_norm": 14.958976430762513,
|
|
"learning_rate": 9.225238308861723e-06,
|
|
"loss": 3.536166191101074,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.7862394957983193,
|
|
"grad_norm": 14.319205644234918,
|
|
"learning_rate": 9.223603491276733e-06,
|
|
"loss": 3.352311611175537,
|
|
"step": 1497
|
|
},
|
|
{
|
|
"epoch": 0.7867647058823529,
|
|
"grad_norm": 8.920169385437928,
|
|
"learning_rate": 9.221967095874733e-06,
|
|
"loss": 2.385847806930542,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.7872899159663865,
|
|
"grad_norm": 7.281576255543728,
|
|
"learning_rate": 9.220329123267031e-06,
|
|
"loss": 2.830510377883911,
|
|
"step": 1499
|
|
},
|
|
{
|
|
"epoch": 0.7878151260504201,
|
|
"grad_norm": 10.422748287727703,
|
|
"learning_rate": 9.218689574065526e-06,
|
|
"loss": 2.962968111038208,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7883403361344538,
|
|
"grad_norm": 9.245955210912829,
|
|
"learning_rate": 9.217048448882711e-06,
|
|
"loss": 2.6180567741394043,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 0.7888655462184874,
|
|
"grad_norm": 13.38146211504871,
|
|
"learning_rate": 9.21540574833166e-06,
|
|
"loss": 2.6950042247772217,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.789390756302521,
|
|
"grad_norm": 8.810541645626765,
|
|
"learning_rate": 9.213761473026039e-06,
|
|
"loss": 3.007662773132324,
|
|
"step": 1503
|
|
},
|
|
{
|
|
"epoch": 0.7899159663865546,
|
|
"grad_norm": 24.004316030977126,
|
|
"learning_rate": 9.212115623580101e-06,
|
|
"loss": 3.302671432495117,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.7904411764705882,
|
|
"grad_norm": 7.767136488283772,
|
|
"learning_rate": 9.210468200608691e-06,
|
|
"loss": 2.2067933082580566,
|
|
"step": 1505
|
|
},
|
|
{
|
|
"epoch": 0.7909663865546218,
|
|
"grad_norm": 25.218202744013663,
|
|
"learning_rate": 9.208819204727236e-06,
|
|
"loss": 1.941161036491394,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.7914915966386554,
|
|
"grad_norm": 7.890959883648676,
|
|
"learning_rate": 9.207168636551755e-06,
|
|
"loss": 2.6079719066619873,
|
|
"step": 1507
|
|
},
|
|
{
|
|
"epoch": 0.792016806722689,
|
|
"grad_norm": 9.973775377953485,
|
|
"learning_rate": 9.205516496698854e-06,
|
|
"loss": 2.7024857997894287,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.7925420168067226,
|
|
"grad_norm": 6.297199469233552,
|
|
"learning_rate": 9.203862785785724e-06,
|
|
"loss": 2.8793561458587646,
|
|
"step": 1509
|
|
},
|
|
{
|
|
"epoch": 0.7930672268907563,
|
|
"grad_norm": 10.631547124866534,
|
|
"learning_rate": 9.202207504430142e-06,
|
|
"loss": 3.013770580291748,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.7935924369747899,
|
|
"grad_norm": 13.987690774179669,
|
|
"learning_rate": 9.200550653250477e-06,
|
|
"loss": 2.1392877101898193,
|
|
"step": 1511
|
|
},
|
|
{
|
|
"epoch": 0.7941176470588235,
|
|
"grad_norm": 7.284021811211223,
|
|
"learning_rate": 9.198892232865677e-06,
|
|
"loss": 2.730876922607422,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.7946428571428571,
|
|
"grad_norm": 18.10239370273535,
|
|
"learning_rate": 9.197232243895285e-06,
|
|
"loss": 2.7417426109313965,
|
|
"step": 1513
|
|
},
|
|
{
|
|
"epoch": 0.7951680672268907,
|
|
"grad_norm": 9.887471795104913,
|
|
"learning_rate": 9.195570686959421e-06,
|
|
"loss": 2.0390450954437256,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.7956932773109243,
|
|
"grad_norm": 29.397270516471195,
|
|
"learning_rate": 9.193907562678797e-06,
|
|
"loss": 3.2995693683624268,
|
|
"step": 1515
|
|
},
|
|
{
|
|
"epoch": 0.7962184873949579,
|
|
"grad_norm": 13.347163496386369,
|
|
"learning_rate": 9.192242871674708e-06,
|
|
"loss": 2.5495872497558594,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.7967436974789915,
|
|
"grad_norm": 11.775924365812614,
|
|
"learning_rate": 9.190576614569035e-06,
|
|
"loss": 3.3346171379089355,
|
|
"step": 1517
|
|
},
|
|
{
|
|
"epoch": 0.7972689075630253,
|
|
"grad_norm": 17.052818914961268,
|
|
"learning_rate": 9.188908791984245e-06,
|
|
"loss": 3.410426616668701,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.7977941176470589,
|
|
"grad_norm": 14.978064758373597,
|
|
"learning_rate": 9.187239404543387e-06,
|
|
"loss": 3.411181926727295,
|
|
"step": 1519
|
|
},
|
|
{
|
|
"epoch": 0.7983193277310925,
|
|
"grad_norm": 7.920223723765901,
|
|
"learning_rate": 9.185568452870097e-06,
|
|
"loss": 2.756586790084839,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.7988445378151261,
|
|
"grad_norm": 6.622689475441205,
|
|
"learning_rate": 9.183895937588594e-06,
|
|
"loss": 2.8143153190612793,
|
|
"step": 1521
|
|
},
|
|
{
|
|
"epoch": 0.7993697478991597,
|
|
"grad_norm": 12.922525913327187,
|
|
"learning_rate": 9.18222185932368e-06,
|
|
"loss": 2.860788106918335,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.7998949579831933,
|
|
"grad_norm": 7.11749063393575,
|
|
"learning_rate": 9.180546218700748e-06,
|
|
"loss": 2.6753156185150146,
|
|
"step": 1523
|
|
},
|
|
{
|
|
"epoch": 0.8004201680672269,
|
|
"grad_norm": 11.026417179632636,
|
|
"learning_rate": 9.178869016345764e-06,
|
|
"loss": 2.7370190620422363,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.8009453781512605,
|
|
"grad_norm": 9.530852095755332,
|
|
"learning_rate": 9.177190252885285e-06,
|
|
"loss": 2.4391965866088867,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.8014705882352942,
|
|
"grad_norm": 17.67156318578249,
|
|
"learning_rate": 9.175509928946446e-06,
|
|
"loss": 4.128862380981445,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.8019957983193278,
|
|
"grad_norm": 6.481976886450993,
|
|
"learning_rate": 9.173828045156971e-06,
|
|
"loss": 2.7322418689727783,
|
|
"step": 1527
|
|
},
|
|
{
|
|
"epoch": 0.8025210084033614,
|
|
"grad_norm": 7.48871876982714,
|
|
"learning_rate": 9.172144602145165e-06,
|
|
"loss": 3.071397066116333,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.803046218487395,
|
|
"grad_norm": 10.492401140740409,
|
|
"learning_rate": 9.17045960053991e-06,
|
|
"loss": 2.743645668029785,
|
|
"step": 1529
|
|
},
|
|
{
|
|
"epoch": 0.8035714285714286,
|
|
"grad_norm": 9.789274538067815,
|
|
"learning_rate": 9.168773040970676e-06,
|
|
"loss": 3.117504835128784,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.8040966386554622,
|
|
"grad_norm": 12.287467666758925,
|
|
"learning_rate": 9.167084924067511e-06,
|
|
"loss": 2.697328805923462,
|
|
"step": 1531
|
|
},
|
|
{
|
|
"epoch": 0.8046218487394958,
|
|
"grad_norm": 7.365104896606811,
|
|
"learning_rate": 9.165395250461051e-06,
|
|
"loss": 2.556335210800171,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.8051470588235294,
|
|
"grad_norm": 11.367552528006652,
|
|
"learning_rate": 9.163704020782507e-06,
|
|
"loss": 2.795506477355957,
|
|
"step": 1533
|
|
},
|
|
{
|
|
"epoch": 0.805672268907563,
|
|
"grad_norm": 11.154260289316959,
|
|
"learning_rate": 9.162011235663673e-06,
|
|
"loss": 3.1348161697387695,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.8061974789915967,
|
|
"grad_norm": 8.879832471703603,
|
|
"learning_rate": 9.16031689573693e-06,
|
|
"loss": 2.4550201892852783,
|
|
"step": 1535
|
|
},
|
|
{
|
|
"epoch": 0.8067226890756303,
|
|
"grad_norm": 14.317067424861127,
|
|
"learning_rate": 9.158621001635227e-06,
|
|
"loss": 2.69781494140625,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.8072478991596639,
|
|
"grad_norm": 10.35837871977168,
|
|
"learning_rate": 9.156923553992107e-06,
|
|
"loss": 2.9104769229888916,
|
|
"step": 1537
|
|
},
|
|
{
|
|
"epoch": 0.8077731092436975,
|
|
"grad_norm": 9.992905004281724,
|
|
"learning_rate": 9.155224553441686e-06,
|
|
"loss": 2.8530702590942383,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.8082983193277311,
|
|
"grad_norm": 13.675190289591084,
|
|
"learning_rate": 9.153524000618663e-06,
|
|
"loss": 3.2071030139923096,
|
|
"step": 1539
|
|
},
|
|
{
|
|
"epoch": 0.8088235294117647,
|
|
"grad_norm": 26.7946812082005,
|
|
"learning_rate": 9.151821896158314e-06,
|
|
"loss": 2.7262039184570312,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.8093487394957983,
|
|
"grad_norm": 7.218018270970506,
|
|
"learning_rate": 9.150118240696497e-06,
|
|
"loss": 2.899076461791992,
|
|
"step": 1541
|
|
},
|
|
{
|
|
"epoch": 0.8098739495798319,
|
|
"grad_norm": 14.037592531540101,
|
|
"learning_rate": 9.148413034869647e-06,
|
|
"loss": 3.045046329498291,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.8103991596638656,
|
|
"grad_norm": 10.852758138254117,
|
|
"learning_rate": 9.146706279314786e-06,
|
|
"loss": 2.4369218349456787,
|
|
"step": 1543
|
|
},
|
|
{
|
|
"epoch": 0.8109243697478992,
|
|
"grad_norm": 24.53162774572155,
|
|
"learning_rate": 9.144997974669501e-06,
|
|
"loss": 2.9881112575531006,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.8114495798319328,
|
|
"grad_norm": 8.110464226350164,
|
|
"learning_rate": 9.14328812157197e-06,
|
|
"loss": 2.4895076751708984,
|
|
"step": 1545
|
|
},
|
|
{
|
|
"epoch": 0.8119747899159664,
|
|
"grad_norm": 19.66295977199353,
|
|
"learning_rate": 9.141576720660946e-06,
|
|
"loss": 3.2586450576782227,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.8125,
|
|
"grad_norm": 13.08501660759478,
|
|
"learning_rate": 9.139863772575755e-06,
|
|
"loss": 3.2728970050811768,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 0.8130252100840336,
|
|
"grad_norm": 20.743029134853124,
|
|
"learning_rate": 9.138149277956307e-06,
|
|
"loss": 3.0937044620513916,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.8135504201680672,
|
|
"grad_norm": 14.957686663631023,
|
|
"learning_rate": 9.136433237443093e-06,
|
|
"loss": 2.2607643604278564,
|
|
"step": 1549
|
|
},
|
|
{
|
|
"epoch": 0.8140756302521008,
|
|
"grad_norm": 12.977473079137107,
|
|
"learning_rate": 9.134715651677168e-06,
|
|
"loss": 2.7575249671936035,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.8146008403361344,
|
|
"grad_norm": 8.057678458393218,
|
|
"learning_rate": 9.132996521300178e-06,
|
|
"loss": 2.641611337661743,
|
|
"step": 1551
|
|
},
|
|
{
|
|
"epoch": 0.8151260504201681,
|
|
"grad_norm": 6.814595006980323,
|
|
"learning_rate": 9.13127584695434e-06,
|
|
"loss": 2.5166702270507812,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.8156512605042017,
|
|
"grad_norm": 12.068766752736664,
|
|
"learning_rate": 9.129553629282448e-06,
|
|
"loss": 2.5141143798828125,
|
|
"step": 1553
|
|
},
|
|
{
|
|
"epoch": 0.8161764705882353,
|
|
"grad_norm": 11.878379327278529,
|
|
"learning_rate": 9.127829868927873e-06,
|
|
"loss": 2.772965431213379,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.8167016806722689,
|
|
"grad_norm": 8.460163189018663,
|
|
"learning_rate": 9.126104566534565e-06,
|
|
"loss": 2.6576623916625977,
|
|
"step": 1555
|
|
},
|
|
{
|
|
"epoch": 0.8172268907563025,
|
|
"grad_norm": 9.554429558763722,
|
|
"learning_rate": 9.124377722747041e-06,
|
|
"loss": 2.7791483402252197,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.8177521008403361,
|
|
"grad_norm": 12.88122105903733,
|
|
"learning_rate": 9.122649338210407e-06,
|
|
"loss": 2.890918731689453,
|
|
"step": 1557
|
|
},
|
|
{
|
|
"epoch": 0.8182773109243697,
|
|
"grad_norm": 11.189030906316328,
|
|
"learning_rate": 9.120919413570335e-06,
|
|
"loss": 2.879544734954834,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.8188025210084033,
|
|
"grad_norm": 6.702927071024383,
|
|
"learning_rate": 9.119187949473075e-06,
|
|
"loss": 3.0606887340545654,
|
|
"step": 1559
|
|
},
|
|
{
|
|
"epoch": 0.819327731092437,
|
|
"grad_norm": 11.643616072444471,
|
|
"learning_rate": 9.117454946565452e-06,
|
|
"loss": 3.1395931243896484,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.8198529411764706,
|
|
"grad_norm": 8.0768184982513,
|
|
"learning_rate": 9.115720405494868e-06,
|
|
"loss": 2.4597113132476807,
|
|
"step": 1561
|
|
},
|
|
{
|
|
"epoch": 0.8203781512605042,
|
|
"grad_norm": 14.236125147142554,
|
|
"learning_rate": 9.113984326909295e-06,
|
|
"loss": 2.946138381958008,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.8209033613445378,
|
|
"grad_norm": 16.782998241316438,
|
|
"learning_rate": 9.112246711457284e-06,
|
|
"loss": 3.0810306072235107,
|
|
"step": 1563
|
|
},
|
|
{
|
|
"epoch": 0.8214285714285714,
|
|
"grad_norm": 17.928544530056907,
|
|
"learning_rate": 9.110507559787959e-06,
|
|
"loss": 2.6110334396362305,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.821953781512605,
|
|
"grad_norm": 9.778722921509674,
|
|
"learning_rate": 9.108766872551016e-06,
|
|
"loss": 2.1677751541137695,
|
|
"step": 1565
|
|
},
|
|
{
|
|
"epoch": 0.8224789915966386,
|
|
"grad_norm": 17.178141544970295,
|
|
"learning_rate": 9.107024650396725e-06,
|
|
"loss": 2.8547592163085938,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.8230042016806722,
|
|
"grad_norm": 9.291618929323715,
|
|
"learning_rate": 9.105280893975931e-06,
|
|
"loss": 2.4209301471710205,
|
|
"step": 1567
|
|
},
|
|
{
|
|
"epoch": 0.8235294117647058,
|
|
"grad_norm": 10.98518228802169,
|
|
"learning_rate": 9.10353560394005e-06,
|
|
"loss": 2.8176021575927734,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.8240546218487395,
|
|
"grad_norm": 18.370639315206148,
|
|
"learning_rate": 9.101788780941076e-06,
|
|
"loss": 2.550027370452881,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 0.8245798319327731,
|
|
"grad_norm": 6.883194881280978,
|
|
"learning_rate": 9.10004042563157e-06,
|
|
"loss": 2.9312143325805664,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.8251050420168067,
|
|
"grad_norm": 8.575683307401519,
|
|
"learning_rate": 9.098290538664665e-06,
|
|
"loss": 3.1522021293640137,
|
|
"step": 1571
|
|
},
|
|
{
|
|
"epoch": 0.8256302521008403,
|
|
"grad_norm": 9.918808821214965,
|
|
"learning_rate": 9.09653912069407e-06,
|
|
"loss": 2.621941328048706,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.8261554621848739,
|
|
"grad_norm": 5.701315798905515,
|
|
"learning_rate": 9.094786172374066e-06,
|
|
"loss": 2.666314125061035,
|
|
"step": 1573
|
|
},
|
|
{
|
|
"epoch": 0.8266806722689075,
|
|
"grad_norm": 9.470981150117249,
|
|
"learning_rate": 9.093031694359503e-06,
|
|
"loss": 2.6916842460632324,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.8272058823529411,
|
|
"grad_norm": 10.999721066285408,
|
|
"learning_rate": 9.091275687305804e-06,
|
|
"loss": 3.0902199745178223,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.8277310924369747,
|
|
"grad_norm": 10.424283783181096,
|
|
"learning_rate": 9.089518151868961e-06,
|
|
"loss": 2.500002861022949,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.8282563025210085,
|
|
"grad_norm": 18.008637670608827,
|
|
"learning_rate": 9.087759088705541e-06,
|
|
"loss": 2.706355094909668,
|
|
"step": 1577
|
|
},
|
|
{
|
|
"epoch": 0.8287815126050421,
|
|
"grad_norm": 17.672786450639272,
|
|
"learning_rate": 9.08599849847268e-06,
|
|
"loss": 3.002948760986328,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.8293067226890757,
|
|
"grad_norm": 12.34897547508807,
|
|
"learning_rate": 9.08423638182808e-06,
|
|
"loss": 2.652674674987793,
|
|
"step": 1579
|
|
},
|
|
{
|
|
"epoch": 0.8298319327731093,
|
|
"grad_norm": 19.342554527070632,
|
|
"learning_rate": 9.082472739430022e-06,
|
|
"loss": 3.3123912811279297,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.8303571428571429,
|
|
"grad_norm": 13.128261899532248,
|
|
"learning_rate": 9.08070757193735e-06,
|
|
"loss": 2.7712619304656982,
|
|
"step": 1581
|
|
},
|
|
{
|
|
"epoch": 0.8308823529411765,
|
|
"grad_norm": 23.14870368573147,
|
|
"learning_rate": 9.07894088000948e-06,
|
|
"loss": 2.1335580348968506,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.8314075630252101,
|
|
"grad_norm": 18.28553629166134,
|
|
"learning_rate": 9.077172664306396e-06,
|
|
"loss": 2.875230312347412,
|
|
"step": 1583
|
|
},
|
|
{
|
|
"epoch": 0.8319327731092437,
|
|
"grad_norm": 10.018573350157663,
|
|
"learning_rate": 9.075402925488654e-06,
|
|
"loss": 3.035533905029297,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.8324579831932774,
|
|
"grad_norm": 14.89725356913745,
|
|
"learning_rate": 9.07363166421738e-06,
|
|
"loss": 2.926954507827759,
|
|
"step": 1585
|
|
},
|
|
{
|
|
"epoch": 0.832983193277311,
|
|
"grad_norm": 7.394915894853549,
|
|
"learning_rate": 9.071858881154262e-06,
|
|
"loss": 2.3858561515808105,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.8335084033613446,
|
|
"grad_norm": 7.845961783757571,
|
|
"learning_rate": 9.070084576961563e-06,
|
|
"loss": 2.6644482612609863,
|
|
"step": 1587
|
|
},
|
|
{
|
|
"epoch": 0.8340336134453782,
|
|
"grad_norm": 19.23898419687951,
|
|
"learning_rate": 9.068308752302113e-06,
|
|
"loss": 2.29180908203125,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.8345588235294118,
|
|
"grad_norm": 9.90657725166347,
|
|
"learning_rate": 9.066531407839307e-06,
|
|
"loss": 2.924722671508789,
|
|
"step": 1589
|
|
},
|
|
{
|
|
"epoch": 0.8350840336134454,
|
|
"grad_norm": 11.93098730535124,
|
|
"learning_rate": 9.06475254423711e-06,
|
|
"loss": 2.1523523330688477,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.835609243697479,
|
|
"grad_norm": 14.862881541887209,
|
|
"learning_rate": 9.062972162160058e-06,
|
|
"loss": 3.141295909881592,
|
|
"step": 1591
|
|
},
|
|
{
|
|
"epoch": 0.8361344537815126,
|
|
"grad_norm": 13.99486401192248,
|
|
"learning_rate": 9.061190262273245e-06,
|
|
"loss": 2.4941205978393555,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.8366596638655462,
|
|
"grad_norm": 7.333693688632184,
|
|
"learning_rate": 9.059406845242343e-06,
|
|
"loss": 2.8340518474578857,
|
|
"step": 1593
|
|
},
|
|
{
|
|
"epoch": 0.8371848739495799,
|
|
"grad_norm": 9.769839112158232,
|
|
"learning_rate": 9.057621911733581e-06,
|
|
"loss": 2.4519026279449463,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.8377100840336135,
|
|
"grad_norm": 20.089062803616674,
|
|
"learning_rate": 9.055835462413763e-06,
|
|
"loss": 2.875101089477539,
|
|
"step": 1595
|
|
},
|
|
{
|
|
"epoch": 0.8382352941176471,
|
|
"grad_norm": 10.346942444729434,
|
|
"learning_rate": 9.05404749795025e-06,
|
|
"loss": 2.7726926803588867,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.8387605042016807,
|
|
"grad_norm": 16.022987427837723,
|
|
"learning_rate": 9.05225801901098e-06,
|
|
"loss": 2.792201519012451,
|
|
"step": 1597
|
|
},
|
|
{
|
|
"epoch": 0.8392857142857143,
|
|
"grad_norm": 14.344387140750028,
|
|
"learning_rate": 9.050467026264448e-06,
|
|
"loss": 2.3667960166931152,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.8398109243697479,
|
|
"grad_norm": 23.523762614195864,
|
|
"learning_rate": 9.048674520379715e-06,
|
|
"loss": 2.5323946475982666,
|
|
"step": 1599
|
|
},
|
|
{
|
|
"epoch": 0.8403361344537815,
|
|
"grad_norm": 10.0204642840144,
|
|
"learning_rate": 9.046880502026414e-06,
|
|
"loss": 2.6077938079833984,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.8408613445378151,
|
|
"grad_norm": 11.639795625878955,
|
|
"learning_rate": 9.045084971874738e-06,
|
|
"loss": 2.2000503540039062,
|
|
"step": 1601
|
|
},
|
|
{
|
|
"epoch": 0.8413865546218487,
|
|
"grad_norm": 11.649591327935388,
|
|
"learning_rate": 9.043287930595444e-06,
|
|
"loss": 3.045988082885742,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.8419117647058824,
|
|
"grad_norm": 7.682753411560617,
|
|
"learning_rate": 9.041489378859856e-06,
|
|
"loss": 2.8375630378723145,
|
|
"step": 1603
|
|
},
|
|
{
|
|
"epoch": 0.842436974789916,
|
|
"grad_norm": 14.816083596152376,
|
|
"learning_rate": 9.039689317339861e-06,
|
|
"loss": 2.669459581375122,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.8429621848739496,
|
|
"grad_norm": 10.175232572354465,
|
|
"learning_rate": 9.03788774670791e-06,
|
|
"loss": 2.890617609024048,
|
|
"step": 1605
|
|
},
|
|
{
|
|
"epoch": 0.8434873949579832,
|
|
"grad_norm": 10.31357183244063,
|
|
"learning_rate": 9.036084667637018e-06,
|
|
"loss": 2.981811285018921,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.8440126050420168,
|
|
"grad_norm": 9.441625823542742,
|
|
"learning_rate": 9.034280080800764e-06,
|
|
"loss": 2.2013072967529297,
|
|
"step": 1607
|
|
},
|
|
{
|
|
"epoch": 0.8445378151260504,
|
|
"grad_norm": 12.058196662308841,
|
|
"learning_rate": 9.03247398687329e-06,
|
|
"loss": 2.2728679180145264,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.845063025210084,
|
|
"grad_norm": 8.613017389365172,
|
|
"learning_rate": 9.030666386529303e-06,
|
|
"loss": 2.5344839096069336,
|
|
"step": 1609
|
|
},
|
|
{
|
|
"epoch": 0.8455882352941176,
|
|
"grad_norm": 19.199649351118968,
|
|
"learning_rate": 9.028857280444066e-06,
|
|
"loss": 3.035724639892578,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.8461134453781513,
|
|
"grad_norm": 14.928108605529136,
|
|
"learning_rate": 9.027046669293411e-06,
|
|
"loss": 3.178629159927368,
|
|
"step": 1611
|
|
},
|
|
{
|
|
"epoch": 0.8466386554621849,
|
|
"grad_norm": 11.755087123322417,
|
|
"learning_rate": 9.025234553753733e-06,
|
|
"loss": 2.9528074264526367,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.8471638655462185,
|
|
"grad_norm": 6.5512554572117025,
|
|
"learning_rate": 9.023420934501981e-06,
|
|
"loss": 2.4574341773986816,
|
|
"step": 1613
|
|
},
|
|
{
|
|
"epoch": 0.8476890756302521,
|
|
"grad_norm": 6.431330019113703,
|
|
"learning_rate": 9.021605812215675e-06,
|
|
"loss": 3.0148768424987793,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.8482142857142857,
|
|
"grad_norm": 12.369231388742493,
|
|
"learning_rate": 9.019789187572891e-06,
|
|
"loss": 2.9979090690612793,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 0.8487394957983193,
|
|
"grad_norm": 12.324555670892899,
|
|
"learning_rate": 9.01797106125227e-06,
|
|
"loss": 2.5081629753112793,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.8492647058823529,
|
|
"grad_norm": 6.567047520562435,
|
|
"learning_rate": 9.01615143393301e-06,
|
|
"loss": 2.941706418991089,
|
|
"step": 1617
|
|
},
|
|
{
|
|
"epoch": 0.8497899159663865,
|
|
"grad_norm": 6.613055280987097,
|
|
"learning_rate": 9.014330306294872e-06,
|
|
"loss": 2.133114814758301,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.8503151260504201,
|
|
"grad_norm": 17.371922489063806,
|
|
"learning_rate": 9.012507679018177e-06,
|
|
"loss": 2.7291688919067383,
|
|
"step": 1619
|
|
},
|
|
{
|
|
"epoch": 0.8508403361344538,
|
|
"grad_norm": 8.979174214683121,
|
|
"learning_rate": 9.010683552783805e-06,
|
|
"loss": 2.460345506668091,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.8513655462184874,
|
|
"grad_norm": 13.877560062602784,
|
|
"learning_rate": 9.008857928273199e-06,
|
|
"loss": 2.723776340484619,
|
|
"step": 1621
|
|
},
|
|
{
|
|
"epoch": 0.851890756302521,
|
|
"grad_norm": 19.243310596262656,
|
|
"learning_rate": 9.00703080616836e-06,
|
|
"loss": 2.996762275695801,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.8524159663865546,
|
|
"grad_norm": 7.832357435651682,
|
|
"learning_rate": 9.005202187151845e-06,
|
|
"loss": 2.9319770336151123,
|
|
"step": 1623
|
|
},
|
|
{
|
|
"epoch": 0.8529411764705882,
|
|
"grad_norm": 7.262182086236559,
|
|
"learning_rate": 9.003372071906778e-06,
|
|
"loss": 2.6601133346557617,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.8534663865546218,
|
|
"grad_norm": 8.113081596555123,
|
|
"learning_rate": 9.001540461116835e-06,
|
|
"loss": 2.4604804515838623,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.8539915966386554,
|
|
"grad_norm": 9.882974896597206,
|
|
"learning_rate": 8.999707355466254e-06,
|
|
"loss": 2.5082545280456543,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.854516806722689,
|
|
"grad_norm": 22.703642831626503,
|
|
"learning_rate": 8.997872755639831e-06,
|
|
"loss": 2.6613025665283203,
|
|
"step": 1627
|
|
},
|
|
{
|
|
"epoch": 0.8550420168067226,
|
|
"grad_norm": 13.242193210134968,
|
|
"learning_rate": 8.996036662322917e-06,
|
|
"loss": 3.289824962615967,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.8555672268907563,
|
|
"grad_norm": 12.04027646676417,
|
|
"learning_rate": 8.994199076201428e-06,
|
|
"loss": 3.009115219116211,
|
|
"step": 1629
|
|
},
|
|
{
|
|
"epoch": 0.8560924369747899,
|
|
"grad_norm": 36.66965855685249,
|
|
"learning_rate": 8.992359997961833e-06,
|
|
"loss": 2.9523236751556396,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.8566176470588235,
|
|
"grad_norm": 11.801502784635273,
|
|
"learning_rate": 8.990519428291156e-06,
|
|
"loss": 2.5500600337982178,
|
|
"step": 1631
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"grad_norm": 7.449739602478073,
|
|
"learning_rate": 8.988677367876981e-06,
|
|
"loss": 3.536142349243164,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.8576680672268907,
|
|
"grad_norm": 7.682464339065975,
|
|
"learning_rate": 8.98683381740745e-06,
|
|
"loss": 2.782010316848755,
|
|
"step": 1633
|
|
},
|
|
{
|
|
"epoch": 0.8581932773109243,
|
|
"grad_norm": 6.836296703770633,
|
|
"learning_rate": 8.984988777571262e-06,
|
|
"loss": 2.6691298484802246,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.8587184873949579,
|
|
"grad_norm": 13.059193020003558,
|
|
"learning_rate": 8.98314224905767e-06,
|
|
"loss": 3.201601266860962,
|
|
"step": 1635
|
|
},
|
|
{
|
|
"epoch": 0.8592436974789915,
|
|
"grad_norm": 6.560365363362825,
|
|
"learning_rate": 8.981294232556484e-06,
|
|
"loss": 2.8478498458862305,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.8597689075630253,
|
|
"grad_norm": 12.339699916876029,
|
|
"learning_rate": 8.979444728758067e-06,
|
|
"loss": 2.8949522972106934,
|
|
"step": 1637
|
|
},
|
|
{
|
|
"epoch": 0.8602941176470589,
|
|
"grad_norm": 11.011868694472776,
|
|
"learning_rate": 8.977593738353346e-06,
|
|
"loss": 2.8054542541503906,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.8608193277310925,
|
|
"grad_norm": 8.218158324670545,
|
|
"learning_rate": 8.975741262033793e-06,
|
|
"loss": 2.6899096965789795,
|
|
"step": 1639
|
|
},
|
|
{
|
|
"epoch": 0.8613445378151261,
|
|
"grad_norm": 8.696709895043137,
|
|
"learning_rate": 8.973887300491442e-06,
|
|
"loss": 3.0374200344085693,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.8618697478991597,
|
|
"grad_norm": 6.921135339435363,
|
|
"learning_rate": 8.97203185441888e-06,
|
|
"loss": 2.734715700149536,
|
|
"step": 1641
|
|
},
|
|
{
|
|
"epoch": 0.8623949579831933,
|
|
"grad_norm": 12.337835404848839,
|
|
"learning_rate": 8.970174924509247e-06,
|
|
"loss": 2.811793327331543,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.8629201680672269,
|
|
"grad_norm": 21.3494362008051,
|
|
"learning_rate": 8.968316511456241e-06,
|
|
"loss": 2.6970551013946533,
|
|
"step": 1643
|
|
},
|
|
{
|
|
"epoch": 0.8634453781512605,
|
|
"grad_norm": 11.8210308738897,
|
|
"learning_rate": 8.966456615954112e-06,
|
|
"loss": 2.7946279048919678,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.8639705882352942,
|
|
"grad_norm": 23.76275781703291,
|
|
"learning_rate": 8.964595238697659e-06,
|
|
"loss": 2.2927374839782715,
|
|
"step": 1645
|
|
},
|
|
{
|
|
"epoch": 0.8644957983193278,
|
|
"grad_norm": 18.33915813125871,
|
|
"learning_rate": 8.962732380382246e-06,
|
|
"loss": 2.313857316970825,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 0.8650210084033614,
|
|
"grad_norm": 9.160965929064266,
|
|
"learning_rate": 8.960868041703777e-06,
|
|
"loss": 2.387964963912964,
|
|
"step": 1647
|
|
},
|
|
{
|
|
"epoch": 0.865546218487395,
|
|
"grad_norm": 8.598635522741514,
|
|
"learning_rate": 8.95900222335872e-06,
|
|
"loss": 2.843458890914917,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 0.8660714285714286,
|
|
"grad_norm": 9.765346338635439,
|
|
"learning_rate": 8.957134926044088e-06,
|
|
"loss": 2.3733465671539307,
|
|
"step": 1649
|
|
},
|
|
{
|
|
"epoch": 0.8665966386554622,
|
|
"grad_norm": 13.556767115272129,
|
|
"learning_rate": 8.955266150457452e-06,
|
|
"loss": 2.921518087387085,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.8671218487394958,
|
|
"grad_norm": 14.685458691050979,
|
|
"learning_rate": 8.953395897296929e-06,
|
|
"loss": 2.679130792617798,
|
|
"step": 1651
|
|
},
|
|
{
|
|
"epoch": 0.8676470588235294,
|
|
"grad_norm": 11.97178655513145,
|
|
"learning_rate": 8.951524167261197e-06,
|
|
"loss": 2.4244470596313477,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 0.868172268907563,
|
|
"grad_norm": 8.201418695356,
|
|
"learning_rate": 8.949650961049479e-06,
|
|
"loss": 2.051436185836792,
|
|
"step": 1653
|
|
},
|
|
{
|
|
"epoch": 0.8686974789915967,
|
|
"grad_norm": 14.017589373766244,
|
|
"learning_rate": 8.94777627936155e-06,
|
|
"loss": 2.378939151763916,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 0.8692226890756303,
|
|
"grad_norm": 18.71807707716699,
|
|
"learning_rate": 8.945900122897735e-06,
|
|
"loss": 3.1834826469421387,
|
|
"step": 1655
|
|
},
|
|
{
|
|
"epoch": 0.8697478991596639,
|
|
"grad_norm": 17.128359264542194,
|
|
"learning_rate": 8.944022492358917e-06,
|
|
"loss": 3.1574788093566895,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 0.8702731092436975,
|
|
"grad_norm": 6.959753715832457,
|
|
"learning_rate": 8.942143388446522e-06,
|
|
"loss": 2.0516068935394287,
|
|
"step": 1657
|
|
},
|
|
{
|
|
"epoch": 0.8707983193277311,
|
|
"grad_norm": 19.255530601752262,
|
|
"learning_rate": 8.94026281186253e-06,
|
|
"loss": 2.8824527263641357,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 0.8713235294117647,
|
|
"grad_norm": 15.055252679385482,
|
|
"learning_rate": 8.93838076330947e-06,
|
|
"loss": 2.9361789226531982,
|
|
"step": 1659
|
|
},
|
|
{
|
|
"epoch": 0.8718487394957983,
|
|
"grad_norm": 12.447043391356829,
|
|
"learning_rate": 8.936497243490422e-06,
|
|
"loss": 2.820972442626953,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.8723739495798319,
|
|
"grad_norm": 12.634858670537199,
|
|
"learning_rate": 8.934612253109017e-06,
|
|
"loss": 1.623253583908081,
|
|
"step": 1661
|
|
},
|
|
{
|
|
"epoch": 0.8728991596638656,
|
|
"grad_norm": 18.08487720174843,
|
|
"learning_rate": 8.932725792869427e-06,
|
|
"loss": 2.883774757385254,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 0.8734243697478992,
|
|
"grad_norm": 8.014012097683683,
|
|
"learning_rate": 8.930837863476386e-06,
|
|
"loss": 3.228044033050537,
|
|
"step": 1663
|
|
},
|
|
{
|
|
"epoch": 0.8739495798319328,
|
|
"grad_norm": 14.519615184017779,
|
|
"learning_rate": 8.928948465635168e-06,
|
|
"loss": 2.461923360824585,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 0.8744747899159664,
|
|
"grad_norm": 7.6317719677332425,
|
|
"learning_rate": 8.927057600051594e-06,
|
|
"loss": 3.0148236751556396,
|
|
"step": 1665
|
|
},
|
|
{
|
|
"epoch": 0.875,
|
|
"grad_norm": 13.162314049139713,
|
|
"learning_rate": 8.925165267432044e-06,
|
|
"loss": 3.121001958847046,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 0.8755252100840336,
|
|
"grad_norm": 17.913142458560134,
|
|
"learning_rate": 8.923271468483434e-06,
|
|
"loss": 2.5421323776245117,
|
|
"step": 1667
|
|
},
|
|
{
|
|
"epoch": 0.8760504201680672,
|
|
"grad_norm": 8.57593747128098,
|
|
"learning_rate": 8.921376203913235e-06,
|
|
"loss": 2.5172932147979736,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 0.8765756302521008,
|
|
"grad_norm": 15.23364369766554,
|
|
"learning_rate": 8.919479474429462e-06,
|
|
"loss": 3.1511287689208984,
|
|
"step": 1669
|
|
},
|
|
{
|
|
"epoch": 0.8771008403361344,
|
|
"grad_norm": 7.184408074539215,
|
|
"learning_rate": 8.917581280740678e-06,
|
|
"loss": 2.1041531562805176,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.8776260504201681,
|
|
"grad_norm": 13.539497022029291,
|
|
"learning_rate": 8.915681623556e-06,
|
|
"loss": 2.539247751235962,
|
|
"step": 1671
|
|
},
|
|
{
|
|
"epoch": 0.8781512605042017,
|
|
"grad_norm": 10.215448042272689,
|
|
"learning_rate": 8.913780503585076e-06,
|
|
"loss": 2.619029998779297,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 0.8786764705882353,
|
|
"grad_norm": 16.407774137063473,
|
|
"learning_rate": 8.911877921538117e-06,
|
|
"loss": 2.8678293228149414,
|
|
"step": 1673
|
|
},
|
|
{
|
|
"epoch": 0.8792016806722689,
|
|
"grad_norm": 7.616181387849795,
|
|
"learning_rate": 8.90997387812587e-06,
|
|
"loss": 2.9257354736328125,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 0.8797268907563025,
|
|
"grad_norm": 11.510999061577067,
|
|
"learning_rate": 8.90806837405963e-06,
|
|
"loss": 2.6372413635253906,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.8802521008403361,
|
|
"grad_norm": 10.154170848148102,
|
|
"learning_rate": 8.906161410051243e-06,
|
|
"loss": 2.90159273147583,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 0.8807773109243697,
|
|
"grad_norm": 23.0698419512271,
|
|
"learning_rate": 8.904252986813091e-06,
|
|
"loss": 3.2704274654388428,
|
|
"step": 1677
|
|
},
|
|
{
|
|
"epoch": 0.8813025210084033,
|
|
"grad_norm": 8.452106369556175,
|
|
"learning_rate": 8.902343105058108e-06,
|
|
"loss": 2.906270742416382,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 0.881827731092437,
|
|
"grad_norm": 19.2658822183441,
|
|
"learning_rate": 8.900431765499773e-06,
|
|
"loss": 2.535334348678589,
|
|
"step": 1679
|
|
},
|
|
{
|
|
"epoch": 0.8823529411764706,
|
|
"grad_norm": 13.918139299702606,
|
|
"learning_rate": 8.898518968852106e-06,
|
|
"loss": 2.8137190341949463,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8828781512605042,
|
|
"grad_norm": 17.074616182965027,
|
|
"learning_rate": 8.896604715829671e-06,
|
|
"loss": 2.617612361907959,
|
|
"step": 1681
|
|
},
|
|
{
|
|
"epoch": 0.8834033613445378,
|
|
"grad_norm": 13.439043201251245,
|
|
"learning_rate": 8.89468900714758e-06,
|
|
"loss": 2.9933958053588867,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 0.8839285714285714,
|
|
"grad_norm": 9.45811011094876,
|
|
"learning_rate": 8.892771843521487e-06,
|
|
"loss": 2.933568239212036,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 0.884453781512605,
|
|
"grad_norm": 10.572139165361671,
|
|
"learning_rate": 8.890853225667588e-06,
|
|
"loss": 3.162017822265625,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 0.8849789915966386,
|
|
"grad_norm": 45.81132027610318,
|
|
"learning_rate": 8.888933154302626e-06,
|
|
"loss": 2.919490098953247,
|
|
"step": 1685
|
|
},
|
|
{
|
|
"epoch": 0.8855042016806722,
|
|
"grad_norm": 9.79457843340495,
|
|
"learning_rate": 8.887011630143881e-06,
|
|
"loss": 3.1591439247131348,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 0.8860294117647058,
|
|
"grad_norm": 32.897605463053544,
|
|
"learning_rate": 8.885088653909182e-06,
|
|
"loss": 3.183682918548584,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 0.8865546218487395,
|
|
"grad_norm": 8.911375505162102,
|
|
"learning_rate": 8.883164226316897e-06,
|
|
"loss": 2.6541295051574707,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 0.8870798319327731,
|
|
"grad_norm": 15.724794674305066,
|
|
"learning_rate": 8.881238348085936e-06,
|
|
"loss": 2.4864635467529297,
|
|
"step": 1689
|
|
},
|
|
{
|
|
"epoch": 0.8876050420168067,
|
|
"grad_norm": 10.486028943660472,
|
|
"learning_rate": 8.879311019935752e-06,
|
|
"loss": 2.7954046726226807,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.8881302521008403,
|
|
"grad_norm": 11.23597454548207,
|
|
"learning_rate": 8.877382242586341e-06,
|
|
"loss": 2.1166534423828125,
|
|
"step": 1691
|
|
},
|
|
{
|
|
"epoch": 0.8886554621848739,
|
|
"grad_norm": 21.30826530801347,
|
|
"learning_rate": 8.875452016758239e-06,
|
|
"loss": 2.9630651473999023,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 0.8891806722689075,
|
|
"grad_norm": 13.404603348241913,
|
|
"learning_rate": 8.87352034317252e-06,
|
|
"loss": 2.4266562461853027,
|
|
"step": 1693
|
|
},
|
|
{
|
|
"epoch": 0.8897058823529411,
|
|
"grad_norm": 6.502915576741012,
|
|
"learning_rate": 8.871587222550805e-06,
|
|
"loss": 2.5795416831970215,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 0.8902310924369747,
|
|
"grad_norm": 11.119334651662573,
|
|
"learning_rate": 8.86965265561525e-06,
|
|
"loss": 2.777190685272217,
|
|
"step": 1695
|
|
},
|
|
{
|
|
"epoch": 0.8907563025210085,
|
|
"grad_norm": 17.86791152944123,
|
|
"learning_rate": 8.867716643088556e-06,
|
|
"loss": 3.3912010192871094,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 0.8912815126050421,
|
|
"grad_norm": 17.06221085051761,
|
|
"learning_rate": 8.865779185693957e-06,
|
|
"loss": 2.7764787673950195,
|
|
"step": 1697
|
|
},
|
|
{
|
|
"epoch": 0.8918067226890757,
|
|
"grad_norm": 14.97933346471872,
|
|
"learning_rate": 8.863840284155238e-06,
|
|
"loss": 2.9894399642944336,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 0.8923319327731093,
|
|
"grad_norm": 22.168720702326574,
|
|
"learning_rate": 8.861899939196713e-06,
|
|
"loss": 2.7285947799682617,
|
|
"step": 1699
|
|
},
|
|
{
|
|
"epoch": 0.8928571428571429,
|
|
"grad_norm": 9.30645480741557,
|
|
"learning_rate": 8.85995815154324e-06,
|
|
"loss": 2.4073543548583984,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.8933823529411765,
|
|
"grad_norm": 24.347057275120157,
|
|
"learning_rate": 8.858014921920215e-06,
|
|
"loss": 2.8383591175079346,
|
|
"step": 1701
|
|
},
|
|
{
|
|
"epoch": 0.8939075630252101,
|
|
"grad_norm": 8.858399302051355,
|
|
"learning_rate": 8.856070251053572e-06,
|
|
"loss": 2.7161450386047363,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 0.8944327731092437,
|
|
"grad_norm": 9.047301836053926,
|
|
"learning_rate": 8.854124139669786e-06,
|
|
"loss": 3.419163465499878,
|
|
"step": 1703
|
|
},
|
|
{
|
|
"epoch": 0.8949579831932774,
|
|
"grad_norm": 13.228462933779975,
|
|
"learning_rate": 8.852176588495867e-06,
|
|
"loss": 3.4164323806762695,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 0.895483193277311,
|
|
"grad_norm": 12.974830432527467,
|
|
"learning_rate": 8.850227598259365e-06,
|
|
"loss": 2.6973154544830322,
|
|
"step": 1705
|
|
},
|
|
{
|
|
"epoch": 0.8960084033613446,
|
|
"grad_norm": 11.184728469837097,
|
|
"learning_rate": 8.848277169688367e-06,
|
|
"loss": 2.7660531997680664,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 0.8965336134453782,
|
|
"grad_norm": 14.59336775482168,
|
|
"learning_rate": 8.846325303511496e-06,
|
|
"loss": 2.886784076690674,
|
|
"step": 1707
|
|
},
|
|
{
|
|
"epoch": 0.8970588235294118,
|
|
"grad_norm": 12.71634831979801,
|
|
"learning_rate": 8.844372000457912e-06,
|
|
"loss": 2.6247849464416504,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 0.8975840336134454,
|
|
"grad_norm": 8.224274295081706,
|
|
"learning_rate": 8.842417261257316e-06,
|
|
"loss": 2.5844714641571045,
|
|
"step": 1709
|
|
},
|
|
{
|
|
"epoch": 0.898109243697479,
|
|
"grad_norm": 11.057156827735009,
|
|
"learning_rate": 8.840461086639942e-06,
|
|
"loss": 2.1547021865844727,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.8986344537815126,
|
|
"grad_norm": 6.008911221601693,
|
|
"learning_rate": 8.83850347733656e-06,
|
|
"loss": 2.193338394165039,
|
|
"step": 1711
|
|
},
|
|
{
|
|
"epoch": 0.8991596638655462,
|
|
"grad_norm": 9.797789057581019,
|
|
"learning_rate": 8.836544434078473e-06,
|
|
"loss": 2.0880467891693115,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 0.8996848739495799,
|
|
"grad_norm": 8.385600858999325,
|
|
"learning_rate": 8.83458395759753e-06,
|
|
"loss": 2.9709341526031494,
|
|
"step": 1713
|
|
},
|
|
{
|
|
"epoch": 0.9002100840336135,
|
|
"grad_norm": 11.02729520920482,
|
|
"learning_rate": 8.832622048626104e-06,
|
|
"loss": 2.6536970138549805,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 0.9007352941176471,
|
|
"grad_norm": 6.5241815816256095,
|
|
"learning_rate": 8.830658707897111e-06,
|
|
"loss": 2.5592041015625,
|
|
"step": 1715
|
|
},
|
|
{
|
|
"epoch": 0.9012605042016807,
|
|
"grad_norm": 8.289439200032332,
|
|
"learning_rate": 8.828693936143995e-06,
|
|
"loss": 2.1298305988311768,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 0.9017857142857143,
|
|
"grad_norm": 9.131984002984787,
|
|
"learning_rate": 8.826727734100742e-06,
|
|
"loss": 2.1312835216522217,
|
|
"step": 1717
|
|
},
|
|
{
|
|
"epoch": 0.9023109243697479,
|
|
"grad_norm": 9.74110691329254,
|
|
"learning_rate": 8.824760102501865e-06,
|
|
"loss": 2.2512333393096924,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 0.9028361344537815,
|
|
"grad_norm": 19.930437776451477,
|
|
"learning_rate": 8.82279104208242e-06,
|
|
"loss": 2.780616521835327,
|
|
"step": 1719
|
|
},
|
|
{
|
|
"epoch": 0.9033613445378151,
|
|
"grad_norm": 15.732270777879222,
|
|
"learning_rate": 8.820820553577985e-06,
|
|
"loss": 2.844557046890259,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.9038865546218487,
|
|
"grad_norm": 11.653246048171543,
|
|
"learning_rate": 8.818848637724681e-06,
|
|
"loss": 2.871880054473877,
|
|
"step": 1721
|
|
},
|
|
{
|
|
"epoch": 0.9044117647058824,
|
|
"grad_norm": 22.783994398798196,
|
|
"learning_rate": 8.816875295259162e-06,
|
|
"loss": 2.7712903022766113,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 0.904936974789916,
|
|
"grad_norm": 14.66931092739854,
|
|
"learning_rate": 8.814900526918608e-06,
|
|
"loss": 2.664705753326416,
|
|
"step": 1723
|
|
},
|
|
{
|
|
"epoch": 0.9054621848739496,
|
|
"grad_norm": 13.507599300840212,
|
|
"learning_rate": 8.812924333440736e-06,
|
|
"loss": 2.4884164333343506,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 0.9059873949579832,
|
|
"grad_norm": 8.016607953026961,
|
|
"learning_rate": 8.810946715563798e-06,
|
|
"loss": 2.5067238807678223,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.9065126050420168,
|
|
"grad_norm": 9.126188841154358,
|
|
"learning_rate": 8.808967674026572e-06,
|
|
"loss": 2.2832911014556885,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 0.9070378151260504,
|
|
"grad_norm": 9.858863483209218,
|
|
"learning_rate": 8.806987209568372e-06,
|
|
"loss": 2.6985623836517334,
|
|
"step": 1727
|
|
},
|
|
{
|
|
"epoch": 0.907563025210084,
|
|
"grad_norm": 10.771258049018208,
|
|
"learning_rate": 8.805005322929046e-06,
|
|
"loss": 1.8074414730072021,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 0.9080882352941176,
|
|
"grad_norm": 21.656788517948772,
|
|
"learning_rate": 8.803022014848966e-06,
|
|
"loss": 2.817106246948242,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 0.9086134453781513,
|
|
"grad_norm": 14.257204034945724,
|
|
"learning_rate": 8.80103728606904e-06,
|
|
"loss": 2.712790012359619,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.9091386554621849,
|
|
"grad_norm": 16.62495737178655,
|
|
"learning_rate": 8.799051137330705e-06,
|
|
"loss": 3.398012161254883,
|
|
"step": 1731
|
|
},
|
|
{
|
|
"epoch": 0.9096638655462185,
|
|
"grad_norm": 13.059757449688918,
|
|
"learning_rate": 8.79706356937593e-06,
|
|
"loss": 3.0172595977783203,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 0.9101890756302521,
|
|
"grad_norm": 10.693082411575082,
|
|
"learning_rate": 8.795074582947214e-06,
|
|
"loss": 2.965136766433716,
|
|
"step": 1733
|
|
},
|
|
{
|
|
"epoch": 0.9107142857142857,
|
|
"grad_norm": 9.40965957844002,
|
|
"learning_rate": 8.793084178787586e-06,
|
|
"loss": 2.7919960021972656,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 0.9112394957983193,
|
|
"grad_norm": 13.491129075692312,
|
|
"learning_rate": 8.7910923576406e-06,
|
|
"loss": 2.837684392929077,
|
|
"step": 1735
|
|
},
|
|
{
|
|
"epoch": 0.9117647058823529,
|
|
"grad_norm": 8.610810495422943,
|
|
"learning_rate": 8.789099120250346e-06,
|
|
"loss": 2.698582172393799,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 0.9122899159663865,
|
|
"grad_norm": 14.92563382940718,
|
|
"learning_rate": 8.787104467361442e-06,
|
|
"loss": 2.46994686126709,
|
|
"step": 1737
|
|
},
|
|
{
|
|
"epoch": 0.9128151260504201,
|
|
"grad_norm": 18.577083028769557,
|
|
"learning_rate": 8.785108399719029e-06,
|
|
"loss": 2.800351619720459,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 0.9133403361344538,
|
|
"grad_norm": 13.499774219775034,
|
|
"learning_rate": 8.783110918068784e-06,
|
|
"loss": 2.3970227241516113,
|
|
"step": 1739
|
|
},
|
|
{
|
|
"epoch": 0.9138655462184874,
|
|
"grad_norm": 5.99452123879766,
|
|
"learning_rate": 8.781112023156906e-06,
|
|
"loss": 2.9113054275512695,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.914390756302521,
|
|
"grad_norm": 8.66218967429379,
|
|
"learning_rate": 8.779111715730127e-06,
|
|
"loss": 2.531561851501465,
|
|
"step": 1741
|
|
},
|
|
{
|
|
"epoch": 0.9149159663865546,
|
|
"grad_norm": 12.395263930738281,
|
|
"learning_rate": 8.777109996535701e-06,
|
|
"loss": 2.794752597808838,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 0.9154411764705882,
|
|
"grad_norm": 16.55794421118958,
|
|
"learning_rate": 8.775106866321419e-06,
|
|
"loss": 2.6315114498138428,
|
|
"step": 1743
|
|
},
|
|
{
|
|
"epoch": 0.9159663865546218,
|
|
"grad_norm": 12.322031152831002,
|
|
"learning_rate": 8.773102325835587e-06,
|
|
"loss": 3.066227674484253,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 0.9164915966386554,
|
|
"grad_norm": 14.013293060084008,
|
|
"learning_rate": 8.771096375827047e-06,
|
|
"loss": 3.158912181854248,
|
|
"step": 1745
|
|
},
|
|
{
|
|
"epoch": 0.917016806722689,
|
|
"grad_norm": 9.40695823246611,
|
|
"learning_rate": 8.769089017045163e-06,
|
|
"loss": 3.091387987136841,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 0.9175420168067226,
|
|
"grad_norm": 7.715469404017814,
|
|
"learning_rate": 8.767080250239826e-06,
|
|
"loss": 2.7237539291381836,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 0.9180672268907563,
|
|
"grad_norm": 14.007152391202004,
|
|
"learning_rate": 8.765070076161457e-06,
|
|
"loss": 2.5782065391540527,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 0.9185924369747899,
|
|
"grad_norm": 16.994065017834338,
|
|
"learning_rate": 8.763058495560994e-06,
|
|
"loss": 2.9540176391601562,
|
|
"step": 1749
|
|
},
|
|
{
|
|
"epoch": 0.9191176470588235,
|
|
"grad_norm": 11.371718126473317,
|
|
"learning_rate": 8.761045509189912e-06,
|
|
"loss": 3.109292507171631,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.9196428571428571,
|
|
"grad_norm": 17.264147344626977,
|
|
"learning_rate": 8.7590311178002e-06,
|
|
"loss": 3.1204617023468018,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 0.9201680672268907,
|
|
"grad_norm": 6.1149940045501525,
|
|
"learning_rate": 8.757015322144377e-06,
|
|
"loss": 2.7700257301330566,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 0.9206932773109243,
|
|
"grad_norm": 21.35446981291788,
|
|
"learning_rate": 8.754998122975489e-06,
|
|
"loss": 2.498889684677124,
|
|
"step": 1753
|
|
},
|
|
{
|
|
"epoch": 0.9212184873949579,
|
|
"grad_norm": 13.79860645915577,
|
|
"learning_rate": 8.752979521047103e-06,
|
|
"loss": 2.9995710849761963,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 0.9217436974789915,
|
|
"grad_norm": 16.40276401841837,
|
|
"learning_rate": 8.750959517113309e-06,
|
|
"loss": 2.588589668273926,
|
|
"step": 1755
|
|
},
|
|
{
|
|
"epoch": 0.9222689075630253,
|
|
"grad_norm": 15.780013826258518,
|
|
"learning_rate": 8.748938111928723e-06,
|
|
"loss": 2.793461322784424,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 0.9227941176470589,
|
|
"grad_norm": 14.078165717018452,
|
|
"learning_rate": 8.746915306248488e-06,
|
|
"loss": 2.1291205883026123,
|
|
"step": 1757
|
|
},
|
|
{
|
|
"epoch": 0.9233193277310925,
|
|
"grad_norm": 17.000730244844867,
|
|
"learning_rate": 8.744891100828261e-06,
|
|
"loss": 3.3476784229278564,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 0.9238445378151261,
|
|
"grad_norm": 21.73777257059286,
|
|
"learning_rate": 8.742865496424228e-06,
|
|
"loss": 3.958864212036133,
|
|
"step": 1759
|
|
},
|
|
{
|
|
"epoch": 0.9243697478991597,
|
|
"grad_norm": 8.904155981796297,
|
|
"learning_rate": 8.740838493793098e-06,
|
|
"loss": 2.3914618492126465,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.9248949579831933,
|
|
"grad_norm": 8.28742029991206,
|
|
"learning_rate": 8.7388100936921e-06,
|
|
"loss": 2.660982847213745,
|
|
"step": 1761
|
|
},
|
|
{
|
|
"epoch": 0.9254201680672269,
|
|
"grad_norm": 8.878507569356453,
|
|
"learning_rate": 8.736780296878988e-06,
|
|
"loss": 2.3615212440490723,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 0.9259453781512605,
|
|
"grad_norm": 11.62026842833458,
|
|
"learning_rate": 8.734749104112032e-06,
|
|
"loss": 3.145231008529663,
|
|
"step": 1763
|
|
},
|
|
{
|
|
"epoch": 0.9264705882352942,
|
|
"grad_norm": 8.896661371514389,
|
|
"learning_rate": 8.732716516150032e-06,
|
|
"loss": 2.599561929702759,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 0.9269957983193278,
|
|
"grad_norm": 10.29556865582247,
|
|
"learning_rate": 8.730682533752301e-06,
|
|
"loss": 2.501940965652466,
|
|
"step": 1765
|
|
},
|
|
{
|
|
"epoch": 0.9275210084033614,
|
|
"grad_norm": 10.101323750608474,
|
|
"learning_rate": 8.728647157678678e-06,
|
|
"loss": 2.709500789642334,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 0.928046218487395,
|
|
"grad_norm": 8.644139455011867,
|
|
"learning_rate": 8.72661038868952e-06,
|
|
"loss": 2.2276365756988525,
|
|
"step": 1767
|
|
},
|
|
{
|
|
"epoch": 0.9285714285714286,
|
|
"grad_norm": 6.485331202539536,
|
|
"learning_rate": 8.724572227545707e-06,
|
|
"loss": 2.9125149250030518,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 0.9290966386554622,
|
|
"grad_norm": 11.062385891642846,
|
|
"learning_rate": 8.722532675008635e-06,
|
|
"loss": 2.6444571018218994,
|
|
"step": 1769
|
|
},
|
|
{
|
|
"epoch": 0.9296218487394958,
|
|
"grad_norm": 8.506277266884535,
|
|
"learning_rate": 8.720491731840223e-06,
|
|
"loss": 2.7320895195007324,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.9301470588235294,
|
|
"grad_norm": 20.332919090775746,
|
|
"learning_rate": 8.718449398802914e-06,
|
|
"loss": 2.454099416732788,
|
|
"step": 1771
|
|
},
|
|
{
|
|
"epoch": 0.930672268907563,
|
|
"grad_norm": 11.21722087123936,
|
|
"learning_rate": 8.716405676659656e-06,
|
|
"loss": 3.093299388885498,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 0.9311974789915967,
|
|
"grad_norm": 13.852780182447871,
|
|
"learning_rate": 8.714360566173932e-06,
|
|
"loss": 2.987112045288086,
|
|
"step": 1773
|
|
},
|
|
{
|
|
"epoch": 0.9317226890756303,
|
|
"grad_norm": 8.142773274599534,
|
|
"learning_rate": 8.712314068109732e-06,
|
|
"loss": 3.0356099605560303,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 0.9322478991596639,
|
|
"grad_norm": 15.95072720515174,
|
|
"learning_rate": 8.710266183231574e-06,
|
|
"loss": 3.1858644485473633,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.9327731092436975,
|
|
"grad_norm": 35.115395441844655,
|
|
"learning_rate": 8.708216912304484e-06,
|
|
"loss": 2.502962827682495,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 0.9332983193277311,
|
|
"grad_norm": 13.547142890784473,
|
|
"learning_rate": 8.706166256094013e-06,
|
|
"loss": 3.0994725227355957,
|
|
"step": 1777
|
|
},
|
|
{
|
|
"epoch": 0.9338235294117647,
|
|
"grad_norm": 14.441962799616894,
|
|
"learning_rate": 8.704114215366228e-06,
|
|
"loss": 2.9976320266723633,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 0.9343487394957983,
|
|
"grad_norm": 11.323843563316196,
|
|
"learning_rate": 8.70206079088771e-06,
|
|
"loss": 2.6001384258270264,
|
|
"step": 1779
|
|
},
|
|
{
|
|
"epoch": 0.9348739495798319,
|
|
"grad_norm": 14.418821458631701,
|
|
"learning_rate": 8.700005983425562e-06,
|
|
"loss": 2.52199649810791,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.9353991596638656,
|
|
"grad_norm": 15.704931815547031,
|
|
"learning_rate": 8.6979497937474e-06,
|
|
"loss": 2.6769728660583496,
|
|
"step": 1781
|
|
},
|
|
{
|
|
"epoch": 0.9359243697478992,
|
|
"grad_norm": 10.456791049827286,
|
|
"learning_rate": 8.695892222621359e-06,
|
|
"loss": 2.4125683307647705,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 0.9364495798319328,
|
|
"grad_norm": 19.293268901498354,
|
|
"learning_rate": 8.693833270816083e-06,
|
|
"loss": 2.435770034790039,
|
|
"step": 1783
|
|
},
|
|
{
|
|
"epoch": 0.9369747899159664,
|
|
"grad_norm": 9.749255660128235,
|
|
"learning_rate": 8.691772939100745e-06,
|
|
"loss": 2.727022409439087,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 0.9375,
|
|
"grad_norm": 12.592670289014633,
|
|
"learning_rate": 8.689711228245021e-06,
|
|
"loss": 2.8270492553710938,
|
|
"step": 1785
|
|
},
|
|
{
|
|
"epoch": 0.9380252100840336,
|
|
"grad_norm": 10.547352093485244,
|
|
"learning_rate": 8.687648139019107e-06,
|
|
"loss": 2.904310464859009,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 0.9385504201680672,
|
|
"grad_norm": 7.256107660315933,
|
|
"learning_rate": 8.685583672193716e-06,
|
|
"loss": 2.8405511379241943,
|
|
"step": 1787
|
|
},
|
|
{
|
|
"epoch": 0.9390756302521008,
|
|
"grad_norm": 38.87294542436356,
|
|
"learning_rate": 8.683517828540074e-06,
|
|
"loss": 2.3819591999053955,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 0.9396008403361344,
|
|
"grad_norm": 8.689467624525195,
|
|
"learning_rate": 8.681450608829916e-06,
|
|
"loss": 2.5236239433288574,
|
|
"step": 1789
|
|
},
|
|
{
|
|
"epoch": 0.9401260504201681,
|
|
"grad_norm": 14.207355681187238,
|
|
"learning_rate": 8.679382013835502e-06,
|
|
"loss": 2.255479097366333,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.9406512605042017,
|
|
"grad_norm": 9.98703450472335,
|
|
"learning_rate": 8.677312044329595e-06,
|
|
"loss": 2.883563756942749,
|
|
"step": 1791
|
|
},
|
|
{
|
|
"epoch": 0.9411764705882353,
|
|
"grad_norm": 8.386518263299669,
|
|
"learning_rate": 8.675240701085481e-06,
|
|
"loss": 2.463327407836914,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 0.9417016806722689,
|
|
"grad_norm": 12.831345621892083,
|
|
"learning_rate": 8.67316798487695e-06,
|
|
"loss": 3.131563663482666,
|
|
"step": 1793
|
|
},
|
|
{
|
|
"epoch": 0.9422268907563025,
|
|
"grad_norm": 13.046653678099416,
|
|
"learning_rate": 8.67109389647831e-06,
|
|
"loss": 3.0421652793884277,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 0.9427521008403361,
|
|
"grad_norm": 17.84912520763003,
|
|
"learning_rate": 8.669018436664382e-06,
|
|
"loss": 2.6702780723571777,
|
|
"step": 1795
|
|
},
|
|
{
|
|
"epoch": 0.9432773109243697,
|
|
"grad_norm": 7.9071114232742215,
|
|
"learning_rate": 8.6669416062105e-06,
|
|
"loss": 2.7219574451446533,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 0.9438025210084033,
|
|
"grad_norm": 15.392730609681045,
|
|
"learning_rate": 8.664863405892506e-06,
|
|
"loss": 2.210427761077881,
|
|
"step": 1797
|
|
},
|
|
{
|
|
"epoch": 0.944327731092437,
|
|
"grad_norm": 7.982180317670834,
|
|
"learning_rate": 8.662783836486751e-06,
|
|
"loss": 2.505744457244873,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 0.9448529411764706,
|
|
"grad_norm": 7.536379874194175,
|
|
"learning_rate": 8.660702898770113e-06,
|
|
"loss": 2.801051378250122,
|
|
"step": 1799
|
|
},
|
|
{
|
|
"epoch": 0.9453781512605042,
|
|
"grad_norm": 13.524087297376317,
|
|
"learning_rate": 8.658620593519964e-06,
|
|
"loss": 3.414421796798706,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.9459033613445378,
|
|
"grad_norm": 12.612533189965147,
|
|
"learning_rate": 8.656536921514195e-06,
|
|
"loss": 2.69132924079895,
|
|
"step": 1801
|
|
},
|
|
{
|
|
"epoch": 0.9464285714285714,
|
|
"grad_norm": 36.07657582307637,
|
|
"learning_rate": 8.654451883531205e-06,
|
|
"loss": 2.282496690750122,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 0.946953781512605,
|
|
"grad_norm": 11.902700522909452,
|
|
"learning_rate": 8.652365480349904e-06,
|
|
"loss": 2.824908971786499,
|
|
"step": 1803
|
|
},
|
|
{
|
|
"epoch": 0.9474789915966386,
|
|
"grad_norm": 35.3183446640842,
|
|
"learning_rate": 8.650277712749715e-06,
|
|
"loss": 3.2178330421447754,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 0.9480042016806722,
|
|
"grad_norm": 8.091882541854254,
|
|
"learning_rate": 8.648188581510567e-06,
|
|
"loss": 2.548964023590088,
|
|
"step": 1805
|
|
},
|
|
{
|
|
"epoch": 0.9485294117647058,
|
|
"grad_norm": 10.806709211954933,
|
|
"learning_rate": 8.646098087412897e-06,
|
|
"loss": 2.827819347381592,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 0.9490546218487395,
|
|
"grad_norm": 14.81539601197576,
|
|
"learning_rate": 8.644006231237655e-06,
|
|
"loss": 2.643984317779541,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 0.9495798319327731,
|
|
"grad_norm": 11.05488678125289,
|
|
"learning_rate": 8.641913013766301e-06,
|
|
"loss": 2.810927629470825,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 0.9501050420168067,
|
|
"grad_norm": 16.88559157091052,
|
|
"learning_rate": 8.639818435780797e-06,
|
|
"loss": 2.7036876678466797,
|
|
"step": 1809
|
|
},
|
|
{
|
|
"epoch": 0.9506302521008403,
|
|
"grad_norm": 12.332844750129505,
|
|
"learning_rate": 8.637722498063619e-06,
|
|
"loss": 3.2361364364624023,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.9511554621848739,
|
|
"grad_norm": 9.568169116029864,
|
|
"learning_rate": 8.63562520139775e-06,
|
|
"loss": 2.702150821685791,
|
|
"step": 1811
|
|
},
|
|
{
|
|
"epoch": 0.9516806722689075,
|
|
"grad_norm": 14.354870471053436,
|
|
"learning_rate": 8.63352654656668e-06,
|
|
"loss": 3.1212148666381836,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 0.9522058823529411,
|
|
"grad_norm": 14.82901426364795,
|
|
"learning_rate": 8.631426534354404e-06,
|
|
"loss": 3.0510215759277344,
|
|
"step": 1813
|
|
},
|
|
{
|
|
"epoch": 0.9527310924369747,
|
|
"grad_norm": 7.052700426308374,
|
|
"learning_rate": 8.629325165545426e-06,
|
|
"loss": 2.7621378898620605,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 0.9532563025210085,
|
|
"grad_norm": 10.181466227394393,
|
|
"learning_rate": 8.62722244092476e-06,
|
|
"loss": 2.6987380981445312,
|
|
"step": 1815
|
|
},
|
|
{
|
|
"epoch": 0.9537815126050421,
|
|
"grad_norm": 7.379642276446604,
|
|
"learning_rate": 8.625118361277921e-06,
|
|
"loss": 2.4137468338012695,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 0.9543067226890757,
|
|
"grad_norm": 9.966511020909078,
|
|
"learning_rate": 8.623012927390936e-06,
|
|
"loss": 2.141328811645508,
|
|
"step": 1817
|
|
},
|
|
{
|
|
"epoch": 0.9548319327731093,
|
|
"grad_norm": 18.90858550355101,
|
|
"learning_rate": 8.620906140050332e-06,
|
|
"loss": 2.9326627254486084,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 0.9553571428571429,
|
|
"grad_norm": 9.905485378163771,
|
|
"learning_rate": 8.618798000043142e-06,
|
|
"loss": 3.3081917762756348,
|
|
"step": 1819
|
|
},
|
|
{
|
|
"epoch": 0.9558823529411765,
|
|
"grad_norm": 8.826804118335973,
|
|
"learning_rate": 8.616688508156912e-06,
|
|
"loss": 2.9522790908813477,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.9564075630252101,
|
|
"grad_norm": 27.27844035411373,
|
|
"learning_rate": 8.614577665179684e-06,
|
|
"loss": 2.997037887573242,
|
|
"step": 1821
|
|
},
|
|
{
|
|
"epoch": 0.9569327731092437,
|
|
"grad_norm": 9.101041023188024,
|
|
"learning_rate": 8.61246547190001e-06,
|
|
"loss": 2.8007779121398926,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 0.9574579831932774,
|
|
"grad_norm": 5.045936465679974,
|
|
"learning_rate": 8.610351929106944e-06,
|
|
"loss": 2.9660260677337646,
|
|
"step": 1823
|
|
},
|
|
{
|
|
"epoch": 0.957983193277311,
|
|
"grad_norm": 9.841308592528314,
|
|
"learning_rate": 8.608237037590044e-06,
|
|
"loss": 2.7075157165527344,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 0.9585084033613446,
|
|
"grad_norm": 13.276096738014182,
|
|
"learning_rate": 8.606120798139375e-06,
|
|
"loss": 2.6712965965270996,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.9590336134453782,
|
|
"grad_norm": 15.612695105516615,
|
|
"learning_rate": 8.6040032115455e-06,
|
|
"loss": 2.821986198425293,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 0.9595588235294118,
|
|
"grad_norm": 13.353716065276132,
|
|
"learning_rate": 8.601884278599493e-06,
|
|
"loss": 2.9493420124053955,
|
|
"step": 1827
|
|
},
|
|
{
|
|
"epoch": 0.9600840336134454,
|
|
"grad_norm": 7.912159385957208,
|
|
"learning_rate": 8.599764000092921e-06,
|
|
"loss": 2.0210976600646973,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 0.960609243697479,
|
|
"grad_norm": 8.04791893105041,
|
|
"learning_rate": 8.597642376817865e-06,
|
|
"loss": 2.616459846496582,
|
|
"step": 1829
|
|
},
|
|
{
|
|
"epoch": 0.9611344537815126,
|
|
"grad_norm": 18.59918151216922,
|
|
"learning_rate": 8.5955194095669e-06,
|
|
"loss": 2.24114990234375,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.9616596638655462,
|
|
"grad_norm": 7.616087818821263,
|
|
"learning_rate": 8.593395099133103e-06,
|
|
"loss": 2.514791488647461,
|
|
"step": 1831
|
|
},
|
|
{
|
|
"epoch": 0.9621848739495799,
|
|
"grad_norm": 11.382891052560675,
|
|
"learning_rate": 8.59126944631006e-06,
|
|
"loss": 2.959714651107788,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 0.9627100840336135,
|
|
"grad_norm": 16.938581937412387,
|
|
"learning_rate": 8.589142451891849e-06,
|
|
"loss": 2.572869300842285,
|
|
"step": 1833
|
|
},
|
|
{
|
|
"epoch": 0.9632352941176471,
|
|
"grad_norm": 12.795502342015439,
|
|
"learning_rate": 8.58701411667306e-06,
|
|
"loss": 2.787219524383545,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 0.9637605042016807,
|
|
"grad_norm": 6.498791139312737,
|
|
"learning_rate": 8.584884441448774e-06,
|
|
"loss": 2.691793203353882,
|
|
"step": 1835
|
|
},
|
|
{
|
|
"epoch": 0.9642857142857143,
|
|
"grad_norm": 9.821994066379503,
|
|
"learning_rate": 8.582753427014576e-06,
|
|
"loss": 2.721123695373535,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 0.9648109243697479,
|
|
"grad_norm": 42.870174084996364,
|
|
"learning_rate": 8.580621074166553e-06,
|
|
"loss": 3.4184532165527344,
|
|
"step": 1837
|
|
},
|
|
{
|
|
"epoch": 0.9653361344537815,
|
|
"grad_norm": 6.494893254965669,
|
|
"learning_rate": 8.57848738370129e-06,
|
|
"loss": 2.405518054962158,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 0.9658613445378151,
|
|
"grad_norm": 6.284528323440992,
|
|
"learning_rate": 8.576352356415876e-06,
|
|
"loss": 2.6421608924865723,
|
|
"step": 1839
|
|
},
|
|
{
|
|
"epoch": 0.9663865546218487,
|
|
"grad_norm": 7.965027483222569,
|
|
"learning_rate": 8.574215993107892e-06,
|
|
"loss": 2.546196460723877,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.9669117647058824,
|
|
"grad_norm": 13.46452882920774,
|
|
"learning_rate": 8.572078294575423e-06,
|
|
"loss": 3.115414619445801,
|
|
"step": 1841
|
|
},
|
|
{
|
|
"epoch": 0.967436974789916,
|
|
"grad_norm": 13.818032339998968,
|
|
"learning_rate": 8.569939261617052e-06,
|
|
"loss": 2.332549571990967,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 0.9679621848739496,
|
|
"grad_norm": 12.092965501437327,
|
|
"learning_rate": 8.56779889503186e-06,
|
|
"loss": 2.978212594985962,
|
|
"step": 1843
|
|
},
|
|
{
|
|
"epoch": 0.9684873949579832,
|
|
"grad_norm": 5.963088828749137,
|
|
"learning_rate": 8.565657195619427e-06,
|
|
"loss": 2.8651227951049805,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 0.9690126050420168,
|
|
"grad_norm": 14.825238184496126,
|
|
"learning_rate": 8.56351416417983e-06,
|
|
"loss": 2.586548089981079,
|
|
"step": 1845
|
|
},
|
|
{
|
|
"epoch": 0.9695378151260504,
|
|
"grad_norm": 10.80124055660655,
|
|
"learning_rate": 8.561369801513647e-06,
|
|
"loss": 2.636096715927124,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 0.970063025210084,
|
|
"grad_norm": 9.325636596277027,
|
|
"learning_rate": 8.559224108421943e-06,
|
|
"loss": 3.1254000663757324,
|
|
"step": 1847
|
|
},
|
|
{
|
|
"epoch": 0.9705882352941176,
|
|
"grad_norm": 12.346374533301125,
|
|
"learning_rate": 8.557077085706295e-06,
|
|
"loss": 2.6923446655273438,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.9711134453781513,
|
|
"grad_norm": 12.07670082141277,
|
|
"learning_rate": 8.554928734168767e-06,
|
|
"loss": 2.7790634632110596,
|
|
"step": 1849
|
|
},
|
|
{
|
|
"epoch": 0.9716386554621849,
|
|
"grad_norm": 12.531202975668133,
|
|
"learning_rate": 8.552779054611917e-06,
|
|
"loss": 2.7498366832733154,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.9721638655462185,
|
|
"grad_norm": 12.654364630281512,
|
|
"learning_rate": 8.550628047838809e-06,
|
|
"loss": 3.2891016006469727,
|
|
"step": 1851
|
|
},
|
|
{
|
|
"epoch": 0.9726890756302521,
|
|
"grad_norm": 30.21970287149626,
|
|
"learning_rate": 8.548475714652997e-06,
|
|
"loss": 3.007063388824463,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 0.9732142857142857,
|
|
"grad_norm": 19.707323009878657,
|
|
"learning_rate": 8.546322055858526e-06,
|
|
"loss": 2.864422082901001,
|
|
"step": 1853
|
|
},
|
|
{
|
|
"epoch": 0.9737394957983193,
|
|
"grad_norm": 12.231623007554356,
|
|
"learning_rate": 8.544167072259947e-06,
|
|
"loss": 2.7920737266540527,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 0.9742647058823529,
|
|
"grad_norm": 16.267777169718915,
|
|
"learning_rate": 8.542010764662296e-06,
|
|
"loss": 3.611020088195801,
|
|
"step": 1855
|
|
},
|
|
{
|
|
"epoch": 0.9747899159663865,
|
|
"grad_norm": 12.559867699834967,
|
|
"learning_rate": 8.53985313387111e-06,
|
|
"loss": 2.9485042095184326,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 0.9753151260504201,
|
|
"grad_norm": 9.655644767699465,
|
|
"learning_rate": 8.537694180692416e-06,
|
|
"loss": 2.939473867416382,
|
|
"step": 1857
|
|
},
|
|
{
|
|
"epoch": 0.9758403361344538,
|
|
"grad_norm": 9.236413649563215,
|
|
"learning_rate": 8.535533905932739e-06,
|
|
"loss": 2.4414329528808594,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 0.9763655462184874,
|
|
"grad_norm": 12.901378568621737,
|
|
"learning_rate": 8.533372310399093e-06,
|
|
"loss": 2.878749132156372,
|
|
"step": 1859
|
|
},
|
|
{
|
|
"epoch": 0.976890756302521,
|
|
"grad_norm": 16.44025528982554,
|
|
"learning_rate": 8.53120939489899e-06,
|
|
"loss": 2.847072124481201,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.9774159663865546,
|
|
"grad_norm": 10.90208461014904,
|
|
"learning_rate": 8.529045160240433e-06,
|
|
"loss": 3.040494918823242,
|
|
"step": 1861
|
|
},
|
|
{
|
|
"epoch": 0.9779411764705882,
|
|
"grad_norm": 15.379610602130203,
|
|
"learning_rate": 8.526879607231917e-06,
|
|
"loss": 2.0407490730285645,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 0.9784663865546218,
|
|
"grad_norm": 10.067268557400256,
|
|
"learning_rate": 8.524712736682433e-06,
|
|
"loss": 2.9385571479797363,
|
|
"step": 1863
|
|
},
|
|
{
|
|
"epoch": 0.9789915966386554,
|
|
"grad_norm": 22.4557381152531,
|
|
"learning_rate": 8.522544549401457e-06,
|
|
"loss": 3.2371599674224854,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 0.979516806722689,
|
|
"grad_norm": 10.521846568231593,
|
|
"learning_rate": 8.520375046198965e-06,
|
|
"loss": 2.6711273193359375,
|
|
"step": 1865
|
|
},
|
|
{
|
|
"epoch": 0.9800420168067226,
|
|
"grad_norm": 10.941139523626964,
|
|
"learning_rate": 8.518204227885422e-06,
|
|
"loss": 2.5425753593444824,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 0.9805672268907563,
|
|
"grad_norm": 11.628589322299094,
|
|
"learning_rate": 8.51603209527178e-06,
|
|
"loss": 3.3113784790039062,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 0.9810924369747899,
|
|
"grad_norm": 16.58473894220331,
|
|
"learning_rate": 8.513858649169488e-06,
|
|
"loss": 3.3791017532348633,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 0.9816176470588235,
|
|
"grad_norm": 21.68638848570282,
|
|
"learning_rate": 8.51168389039048e-06,
|
|
"loss": 2.6426897048950195,
|
|
"step": 1869
|
|
},
|
|
{
|
|
"epoch": 0.9821428571428571,
|
|
"grad_norm": 8.14627549839803,
|
|
"learning_rate": 8.50950781974719e-06,
|
|
"loss": 2.903141736984253,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.9826680672268907,
|
|
"grad_norm": 10.422338501355247,
|
|
"learning_rate": 8.507330438052527e-06,
|
|
"loss": 2.8371524810791016,
|
|
"step": 1871
|
|
},
|
|
{
|
|
"epoch": 0.9831932773109243,
|
|
"grad_norm": 9.94067313746645,
|
|
"learning_rate": 8.505151746119904e-06,
|
|
"loss": 3.0765295028686523,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 0.9837184873949579,
|
|
"grad_norm": 11.063333204420756,
|
|
"learning_rate": 8.502971744763216e-06,
|
|
"loss": 2.120239019393921,
|
|
"step": 1873
|
|
},
|
|
{
|
|
"epoch": 0.9842436974789915,
|
|
"grad_norm": 11.041822515498248,
|
|
"learning_rate": 8.500790434796848e-06,
|
|
"loss": 2.575545310974121,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 0.9847689075630253,
|
|
"grad_norm": 11.183813240011762,
|
|
"learning_rate": 8.498607817035678e-06,
|
|
"loss": 3.2164344787597656,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.9852941176470589,
|
|
"grad_norm": 9.146282145534625,
|
|
"learning_rate": 8.496423892295066e-06,
|
|
"loss": 2.193946361541748,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 0.9858193277310925,
|
|
"grad_norm": 17.41842698593735,
|
|
"learning_rate": 8.494238661390865e-06,
|
|
"loss": 2.7824902534484863,
|
|
"step": 1877
|
|
},
|
|
{
|
|
"epoch": 0.9863445378151261,
|
|
"grad_norm": 8.544143325985905,
|
|
"learning_rate": 8.492052125139414e-06,
|
|
"loss": 2.2539215087890625,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 0.9868697478991597,
|
|
"grad_norm": 17.54889547377129,
|
|
"learning_rate": 8.48986428435754e-06,
|
|
"loss": 3.429238796234131,
|
|
"step": 1879
|
|
},
|
|
{
|
|
"epoch": 0.9873949579831933,
|
|
"grad_norm": 16.245562989276348,
|
|
"learning_rate": 8.48767513986256e-06,
|
|
"loss": 3.189175844192505,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.9879201680672269,
|
|
"grad_norm": 10.627532102304057,
|
|
"learning_rate": 8.485484692472272e-06,
|
|
"loss": 2.395397663116455,
|
|
"step": 1881
|
|
},
|
|
{
|
|
"epoch": 0.9884453781512605,
|
|
"grad_norm": 17.536174472863458,
|
|
"learning_rate": 8.483292943004965e-06,
|
|
"loss": 2.467043399810791,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 0.9889705882352942,
|
|
"grad_norm": 20.09310626654821,
|
|
"learning_rate": 8.481099892279418e-06,
|
|
"loss": 3.0610265731811523,
|
|
"step": 1883
|
|
},
|
|
{
|
|
"epoch": 0.9894957983193278,
|
|
"grad_norm": 8.928325296380594,
|
|
"learning_rate": 8.478905541114886e-06,
|
|
"loss": 3.007861852645874,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 0.9900210084033614,
|
|
"grad_norm": 10.393411495930849,
|
|
"learning_rate": 8.476709890331116e-06,
|
|
"loss": 2.348640203475952,
|
|
"step": 1885
|
|
},
|
|
{
|
|
"epoch": 0.990546218487395,
|
|
"grad_norm": 14.615631412612505,
|
|
"learning_rate": 8.474512940748345e-06,
|
|
"loss": 2.0474324226379395,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 0.9910714285714286,
|
|
"grad_norm": 12.706284887783626,
|
|
"learning_rate": 8.472314693187285e-06,
|
|
"loss": 3.3878824710845947,
|
|
"step": 1887
|
|
},
|
|
{
|
|
"epoch": 0.9915966386554622,
|
|
"grad_norm": 12.225951914697768,
|
|
"learning_rate": 8.470115148469143e-06,
|
|
"loss": 3.540670156478882,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 0.9921218487394958,
|
|
"grad_norm": 7.85398971720135,
|
|
"learning_rate": 8.467914307415601e-06,
|
|
"loss": 2.438688278198242,
|
|
"step": 1889
|
|
},
|
|
{
|
|
"epoch": 0.9926470588235294,
|
|
"grad_norm": 12.665499039931298,
|
|
"learning_rate": 8.465712170848833e-06,
|
|
"loss": 2.5243587493896484,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.993172268907563,
|
|
"grad_norm": 6.031427653850509,
|
|
"learning_rate": 8.463508739591493e-06,
|
|
"loss": 2.1889450550079346,
|
|
"step": 1891
|
|
},
|
|
{
|
|
"epoch": 0.9936974789915967,
|
|
"grad_norm": 9.432235666771582,
|
|
"learning_rate": 8.46130401446672e-06,
|
|
"loss": 2.3379998207092285,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 0.9942226890756303,
|
|
"grad_norm": 9.771243724944751,
|
|
"learning_rate": 8.459097996298137e-06,
|
|
"loss": 2.6740798950195312,
|
|
"step": 1893
|
|
},
|
|
{
|
|
"epoch": 0.9947478991596639,
|
|
"grad_norm": 8.199428256110528,
|
|
"learning_rate": 8.456890685909847e-06,
|
|
"loss": 2.6622955799102783,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 0.9952731092436975,
|
|
"grad_norm": 8.559301862167205,
|
|
"learning_rate": 8.45468208412644e-06,
|
|
"loss": 2.982351303100586,
|
|
"step": 1895
|
|
},
|
|
{
|
|
"epoch": 0.9957983193277311,
|
|
"grad_norm": 17.70681612153687,
|
|
"learning_rate": 8.452472191772983e-06,
|
|
"loss": 2.457289695739746,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 0.9963235294117647,
|
|
"grad_norm": 5.673999815291132,
|
|
"learning_rate": 8.45026100967503e-06,
|
|
"loss": 2.7809672355651855,
|
|
"step": 1897
|
|
},
|
|
{
|
|
"epoch": 0.9968487394957983,
|
|
"grad_norm": 8.730094395146992,
|
|
"learning_rate": 8.448048538658618e-06,
|
|
"loss": 2.8093996047973633,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 0.9973739495798319,
|
|
"grad_norm": 16.419111158636518,
|
|
"learning_rate": 8.445834779550257e-06,
|
|
"loss": 2.8095481395721436,
|
|
"step": 1899
|
|
},
|
|
{
|
|
"epoch": 0.9978991596638656,
|
|
"grad_norm": 12.025154461810699,
|
|
"learning_rate": 8.443619733176949e-06,
|
|
"loss": 2.9833385944366455,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.9984243697478992,
|
|
"grad_norm": 10.744489455236222,
|
|
"learning_rate": 8.441403400366169e-06,
|
|
"loss": 2.7518649101257324,
|
|
"step": 1901
|
|
},
|
|
{
|
|
"epoch": 0.9989495798319328,
|
|
"grad_norm": 12.951882859844048,
|
|
"learning_rate": 8.439185781945878e-06,
|
|
"loss": 2.776477813720703,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 0.9994747899159664,
|
|
"grad_norm": 20.019310686502983,
|
|
"learning_rate": 8.43696687874451e-06,
|
|
"loss": 3.0205705165863037,
|
|
"step": 1903
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 13.33322983646556,
|
|
"learning_rate": 8.434746691590987e-06,
|
|
"loss": 2.7565271854400635,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 1.0005252100840336,
|
|
"grad_norm": 10.425103268717317,
|
|
"learning_rate": 8.432525221314708e-06,
|
|
"loss": 1.6493275165557861,
|
|
"step": 1905
|
|
},
|
|
{
|
|
"epoch": 1.0010504201680672,
|
|
"grad_norm": 12.788701715265818,
|
|
"learning_rate": 8.430302468745546e-06,
|
|
"loss": 2.2459113597869873,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 1.0015756302521008,
|
|
"grad_norm": 11.940067647697864,
|
|
"learning_rate": 8.428078434713863e-06,
|
|
"loss": 1.8953219652175903,
|
|
"step": 1907
|
|
},
|
|
{
|
|
"epoch": 1.0021008403361344,
|
|
"grad_norm": 11.252538127650912,
|
|
"learning_rate": 8.42585312005049e-06,
|
|
"loss": 1.8556398153305054,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 1.002626050420168,
|
|
"grad_norm": 6.741969296382316,
|
|
"learning_rate": 8.423626525586744e-06,
|
|
"loss": 1.626365065574646,
|
|
"step": 1909
|
|
},
|
|
{
|
|
"epoch": 1.0031512605042017,
|
|
"grad_norm": 6.525371213681585,
|
|
"learning_rate": 8.421398652154412e-06,
|
|
"loss": 1.3644397258758545,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 1.0036764705882353,
|
|
"grad_norm": 10.261206768161827,
|
|
"learning_rate": 8.41916950058577e-06,
|
|
"loss": 1.3718510866165161,
|
|
"step": 1911
|
|
},
|
|
{
|
|
"epoch": 1.004201680672269,
|
|
"grad_norm": 13.764872593626992,
|
|
"learning_rate": 8.416939071713559e-06,
|
|
"loss": 1.724814534187317,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 1.0047268907563025,
|
|
"grad_norm": 9.236179682861412,
|
|
"learning_rate": 8.414707366371006e-06,
|
|
"loss": 1.509573221206665,
|
|
"step": 1913
|
|
},
|
|
{
|
|
"epoch": 1.0052521008403361,
|
|
"grad_norm": 13.584490473887904,
|
|
"learning_rate": 8.412474385391814e-06,
|
|
"loss": 1.0250325202941895,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 1.0057773109243697,
|
|
"grad_norm": 15.26791388338633,
|
|
"learning_rate": 8.410240129610158e-06,
|
|
"loss": 1.6212220191955566,
|
|
"step": 1915
|
|
},
|
|
{
|
|
"epoch": 1.0063025210084033,
|
|
"grad_norm": 13.036920248131361,
|
|
"learning_rate": 8.408004599860692e-06,
|
|
"loss": 1.6451389789581299,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 1.006827731092437,
|
|
"grad_norm": 11.095108355949035,
|
|
"learning_rate": 8.405767796978546e-06,
|
|
"loss": 1.5876981019973755,
|
|
"step": 1917
|
|
},
|
|
{
|
|
"epoch": 1.0073529411764706,
|
|
"grad_norm": 25.35169379351488,
|
|
"learning_rate": 8.403529721799325e-06,
|
|
"loss": 2.09970760345459,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 1.0078781512605042,
|
|
"grad_norm": 11.362918327895557,
|
|
"learning_rate": 8.40129037515911e-06,
|
|
"loss": 1.475602388381958,
|
|
"step": 1919
|
|
},
|
|
{
|
|
"epoch": 1.0084033613445378,
|
|
"grad_norm": 9.621594393477524,
|
|
"learning_rate": 8.399049757894457e-06,
|
|
"loss": 2.0639808177948,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.0089285714285714,
|
|
"grad_norm": 9.151211286125385,
|
|
"learning_rate": 8.396807870842396e-06,
|
|
"loss": 1.6148015260696411,
|
|
"step": 1921
|
|
},
|
|
{
|
|
"epoch": 1.009453781512605,
|
|
"grad_norm": 10.874206209075602,
|
|
"learning_rate": 8.394564714840433e-06,
|
|
"loss": 1.5400185585021973,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 1.0099789915966386,
|
|
"grad_norm": 12.736298604252928,
|
|
"learning_rate": 8.392320290726543e-06,
|
|
"loss": 1.2622665166854858,
|
|
"step": 1923
|
|
},
|
|
{
|
|
"epoch": 1.0105042016806722,
|
|
"grad_norm": 16.957767382381217,
|
|
"learning_rate": 8.390074599339182e-06,
|
|
"loss": 2.001948356628418,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 1.0110294117647058,
|
|
"grad_norm": 13.962094214283686,
|
|
"learning_rate": 8.387827641517274e-06,
|
|
"loss": 2.5753512382507324,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 1.0115546218487395,
|
|
"grad_norm": 10.222847154104663,
|
|
"learning_rate": 8.385579418100219e-06,
|
|
"loss": 1.7223831415176392,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 1.012079831932773,
|
|
"grad_norm": 11.457577280648373,
|
|
"learning_rate": 8.383329929927888e-06,
|
|
"loss": 1.5780894756317139,
|
|
"step": 1927
|
|
},
|
|
{
|
|
"epoch": 1.0126050420168067,
|
|
"grad_norm": 12.046002924461916,
|
|
"learning_rate": 8.381079177840625e-06,
|
|
"loss": 1.4928408861160278,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 1.0131302521008403,
|
|
"grad_norm": 10.956957577930083,
|
|
"learning_rate": 8.378827162679248e-06,
|
|
"loss": 1.533614158630371,
|
|
"step": 1929
|
|
},
|
|
{
|
|
"epoch": 1.013655462184874,
|
|
"grad_norm": 9.119484179757583,
|
|
"learning_rate": 8.376573885285041e-06,
|
|
"loss": 1.5786470174789429,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.0141806722689075,
|
|
"grad_norm": 9.904786233687716,
|
|
"learning_rate": 8.37431934649977e-06,
|
|
"loss": 2.3136680126190186,
|
|
"step": 1931
|
|
},
|
|
{
|
|
"epoch": 1.0147058823529411,
|
|
"grad_norm": 10.186617550044655,
|
|
"learning_rate": 8.37206354716566e-06,
|
|
"loss": 1.5691616535186768,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 1.0152310924369747,
|
|
"grad_norm": 8.189996147531692,
|
|
"learning_rate": 8.369806488125418e-06,
|
|
"loss": 1.6489810943603516,
|
|
"step": 1933
|
|
},
|
|
{
|
|
"epoch": 1.0157563025210083,
|
|
"grad_norm": 9.965136678330182,
|
|
"learning_rate": 8.367548170222213e-06,
|
|
"loss": 0.9397400617599487,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 1.016281512605042,
|
|
"grad_norm": 13.698451996841913,
|
|
"learning_rate": 8.365288594299688e-06,
|
|
"loss": 2.7507967948913574,
|
|
"step": 1935
|
|
},
|
|
{
|
|
"epoch": 1.0168067226890756,
|
|
"grad_norm": 14.243010083824592,
|
|
"learning_rate": 8.363027761201957e-06,
|
|
"loss": 1.343361735343933,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 1.0173319327731092,
|
|
"grad_norm": 17.856114600383805,
|
|
"learning_rate": 8.360765671773603e-06,
|
|
"loss": 1.3550803661346436,
|
|
"step": 1937
|
|
},
|
|
{
|
|
"epoch": 1.0178571428571428,
|
|
"grad_norm": 9.674800205447667,
|
|
"learning_rate": 8.358502326859674e-06,
|
|
"loss": 1.7225271463394165,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 1.0183823529411764,
|
|
"grad_norm": 11.045762867972751,
|
|
"learning_rate": 8.356237727305695e-06,
|
|
"loss": 2.1100683212280273,
|
|
"step": 1939
|
|
},
|
|
{
|
|
"epoch": 1.01890756302521,
|
|
"grad_norm": 11.14608259258499,
|
|
"learning_rate": 8.353971873957652e-06,
|
|
"loss": 1.6201719045639038,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 1.0194327731092436,
|
|
"grad_norm": 10.454592897256216,
|
|
"learning_rate": 8.351704767662005e-06,
|
|
"loss": 1.4395060539245605,
|
|
"step": 1941
|
|
},
|
|
{
|
|
"epoch": 1.0199579831932772,
|
|
"grad_norm": 8.00835615271533,
|
|
"learning_rate": 8.34943640926568e-06,
|
|
"loss": 1.489414095878601,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 1.0204831932773109,
|
|
"grad_norm": 16.38390854750751,
|
|
"learning_rate": 8.347166799616069e-06,
|
|
"loss": 1.636405110359192,
|
|
"step": 1943
|
|
},
|
|
{
|
|
"epoch": 1.0210084033613445,
|
|
"grad_norm": 11.206054027613732,
|
|
"learning_rate": 8.344895939561034e-06,
|
|
"loss": 1.1762261390686035,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 1.021533613445378,
|
|
"grad_norm": 10.214458044906083,
|
|
"learning_rate": 8.3426238299489e-06,
|
|
"loss": 1.4226603507995605,
|
|
"step": 1945
|
|
},
|
|
{
|
|
"epoch": 1.0220588235294117,
|
|
"grad_norm": 12.432188175217707,
|
|
"learning_rate": 8.340350471628469e-06,
|
|
"loss": 1.2234781980514526,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 1.0225840336134453,
|
|
"grad_norm": 16.250235673576167,
|
|
"learning_rate": 8.338075865448998e-06,
|
|
"loss": 1.6221668720245361,
|
|
"step": 1947
|
|
},
|
|
{
|
|
"epoch": 1.023109243697479,
|
|
"grad_norm": 12.816687210569661,
|
|
"learning_rate": 8.335800012260211e-06,
|
|
"loss": 1.376111388206482,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 1.0236344537815125,
|
|
"grad_norm": 10.225470219359712,
|
|
"learning_rate": 8.333522912912308e-06,
|
|
"loss": 1.4632911682128906,
|
|
"step": 1949
|
|
},
|
|
{
|
|
"epoch": 1.0241596638655461,
|
|
"grad_norm": 12.56264744409334,
|
|
"learning_rate": 8.331244568255944e-06,
|
|
"loss": 1.10945463180542,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 1.0246848739495797,
|
|
"grad_norm": 12.459697458279704,
|
|
"learning_rate": 8.328964979142244e-06,
|
|
"loss": 1.5290939807891846,
|
|
"step": 1951
|
|
},
|
|
{
|
|
"epoch": 1.0252100840336134,
|
|
"grad_norm": 11.715959432334296,
|
|
"learning_rate": 8.326684146422798e-06,
|
|
"loss": 1.2233198881149292,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 1.025735294117647,
|
|
"grad_norm": 17.91977213508758,
|
|
"learning_rate": 8.324402070949658e-06,
|
|
"loss": 2.126434326171875,
|
|
"step": 1953
|
|
},
|
|
{
|
|
"epoch": 1.0262605042016806,
|
|
"grad_norm": 13.220952963877636,
|
|
"learning_rate": 8.322118753575344e-06,
|
|
"loss": 1.3733046054840088,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 1.0267857142857142,
|
|
"grad_norm": 10.49379912086789,
|
|
"learning_rate": 8.319834195152836e-06,
|
|
"loss": 1.7667503356933594,
|
|
"step": 1955
|
|
},
|
|
{
|
|
"epoch": 1.0273109243697478,
|
|
"grad_norm": 11.748923411034493,
|
|
"learning_rate": 8.31754839653558e-06,
|
|
"loss": 1.35880708694458,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 1.0278361344537814,
|
|
"grad_norm": 8.975299039977969,
|
|
"learning_rate": 8.315261358577485e-06,
|
|
"loss": 1.5697741508483887,
|
|
"step": 1957
|
|
},
|
|
{
|
|
"epoch": 1.028361344537815,
|
|
"grad_norm": 12.443218320042634,
|
|
"learning_rate": 8.312973082132922e-06,
|
|
"loss": 1.6423313617706299,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 1.0288865546218486,
|
|
"grad_norm": 10.609728892527682,
|
|
"learning_rate": 8.310683568056725e-06,
|
|
"loss": 1.6356289386749268,
|
|
"step": 1959
|
|
},
|
|
{
|
|
"epoch": 1.0294117647058822,
|
|
"grad_norm": 11.683745668844416,
|
|
"learning_rate": 8.308392817204194e-06,
|
|
"loss": 1.2431647777557373,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 1.0299369747899159,
|
|
"grad_norm": 8.590326202928376,
|
|
"learning_rate": 8.306100830431085e-06,
|
|
"loss": 1.5427310466766357,
|
|
"step": 1961
|
|
},
|
|
{
|
|
"epoch": 1.0304621848739495,
|
|
"grad_norm": 15.523140519350019,
|
|
"learning_rate": 8.303807608593617e-06,
|
|
"loss": 1.4608049392700195,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 1.0309873949579833,
|
|
"grad_norm": 10.23039884136331,
|
|
"learning_rate": 8.301513152548474e-06,
|
|
"loss": 1.7733904123306274,
|
|
"step": 1963
|
|
},
|
|
{
|
|
"epoch": 1.0315126050420167,
|
|
"grad_norm": 11.219338154828353,
|
|
"learning_rate": 8.2992174631528e-06,
|
|
"loss": 1.797380805015564,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 1.0320378151260505,
|
|
"grad_norm": 12.126234321232678,
|
|
"learning_rate": 8.296920541264197e-06,
|
|
"loss": 1.1642725467681885,
|
|
"step": 1965
|
|
},
|
|
{
|
|
"epoch": 1.0325630252100841,
|
|
"grad_norm": 15.32514566454394,
|
|
"learning_rate": 8.294622387740728e-06,
|
|
"loss": 1.3890987634658813,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 1.0330882352941178,
|
|
"grad_norm": 12.50659440628141,
|
|
"learning_rate": 8.292323003440919e-06,
|
|
"loss": 1.7690242528915405,
|
|
"step": 1967
|
|
},
|
|
{
|
|
"epoch": 1.0336134453781514,
|
|
"grad_norm": 10.479752133577465,
|
|
"learning_rate": 8.290022389223754e-06,
|
|
"loss": 2.1003386974334717,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 1.034138655462185,
|
|
"grad_norm": 30.658755350352227,
|
|
"learning_rate": 8.287720545948676e-06,
|
|
"loss": 1.3065478801727295,
|
|
"step": 1969
|
|
},
|
|
{
|
|
"epoch": 1.0346638655462186,
|
|
"grad_norm": 8.558415688264631,
|
|
"learning_rate": 8.285417474475587e-06,
|
|
"loss": 1.1889278888702393,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 1.0351890756302522,
|
|
"grad_norm": 17.685929104524476,
|
|
"learning_rate": 8.28311317566485e-06,
|
|
"loss": 1.2149498462677002,
|
|
"step": 1971
|
|
},
|
|
{
|
|
"epoch": 1.0357142857142858,
|
|
"grad_norm": 12.117931530133028,
|
|
"learning_rate": 8.28080765037728e-06,
|
|
"loss": 1.1469614505767822,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 1.0362394957983194,
|
|
"grad_norm": 15.88119938961799,
|
|
"learning_rate": 8.278500899474162e-06,
|
|
"loss": 2.375556468963623,
|
|
"step": 1973
|
|
},
|
|
{
|
|
"epoch": 1.036764705882353,
|
|
"grad_norm": 11.486225001536914,
|
|
"learning_rate": 8.27619292381723e-06,
|
|
"loss": 1.814281940460205,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 1.0372899159663866,
|
|
"grad_norm": 10.01479759517437,
|
|
"learning_rate": 8.273883724268672e-06,
|
|
"loss": 1.0509192943572998,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 1.0378151260504203,
|
|
"grad_norm": 10.090578344197827,
|
|
"learning_rate": 8.271573301691145e-06,
|
|
"loss": 1.9715211391448975,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 1.0383403361344539,
|
|
"grad_norm": 16.65268537151143,
|
|
"learning_rate": 8.269261656947755e-06,
|
|
"loss": 2.5270819664001465,
|
|
"step": 1977
|
|
},
|
|
{
|
|
"epoch": 1.0388655462184875,
|
|
"grad_norm": 11.988105008300673,
|
|
"learning_rate": 8.266948790902064e-06,
|
|
"loss": 2.2483301162719727,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 1.039390756302521,
|
|
"grad_norm": 11.726136781315917,
|
|
"learning_rate": 8.264634704418095e-06,
|
|
"loss": 1.6729965209960938,
|
|
"step": 1979
|
|
},
|
|
{
|
|
"epoch": 1.0399159663865547,
|
|
"grad_norm": 9.88970599842327,
|
|
"learning_rate": 8.262319398360323e-06,
|
|
"loss": 2.0312275886535645,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 1.0404411764705883,
|
|
"grad_norm": 6.994579754001975,
|
|
"learning_rate": 8.260002873593679e-06,
|
|
"loss": 1.3279500007629395,
|
|
"step": 1981
|
|
},
|
|
{
|
|
"epoch": 1.040966386554622,
|
|
"grad_norm": 7.711351392387537,
|
|
"learning_rate": 8.257685130983552e-06,
|
|
"loss": 1.3498008251190186,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 1.0414915966386555,
|
|
"grad_norm": 11.63951195762544,
|
|
"learning_rate": 8.255366171395783e-06,
|
|
"loss": 1.81411612033844,
|
|
"step": 1983
|
|
},
|
|
{
|
|
"epoch": 1.0420168067226891,
|
|
"grad_norm": 6.9665214970757185,
|
|
"learning_rate": 8.253045995696669e-06,
|
|
"loss": 1.441640853881836,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 1.0425420168067228,
|
|
"grad_norm": 11.84377264488808,
|
|
"learning_rate": 8.25072460475296e-06,
|
|
"loss": 2.0769429206848145,
|
|
"step": 1985
|
|
},
|
|
{
|
|
"epoch": 1.0430672268907564,
|
|
"grad_norm": 7.8559847090079495,
|
|
"learning_rate": 8.248401999431864e-06,
|
|
"loss": 1.1951301097869873,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 1.04359243697479,
|
|
"grad_norm": 17.04903626381307,
|
|
"learning_rate": 8.246078180601035e-06,
|
|
"loss": 1.5911346673965454,
|
|
"step": 1987
|
|
},
|
|
{
|
|
"epoch": 1.0441176470588236,
|
|
"grad_norm": 9.627284924411768,
|
|
"learning_rate": 8.243753149128589e-06,
|
|
"loss": 1.5778251886367798,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 1.0446428571428572,
|
|
"grad_norm": 9.970954874040544,
|
|
"learning_rate": 8.24142690588309e-06,
|
|
"loss": 1.6873408555984497,
|
|
"step": 1989
|
|
},
|
|
{
|
|
"epoch": 1.0451680672268908,
|
|
"grad_norm": 10.305284208499565,
|
|
"learning_rate": 8.239099451733555e-06,
|
|
"loss": 1.4405624866485596,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 1.0456932773109244,
|
|
"grad_norm": 10.200434591389094,
|
|
"learning_rate": 8.236770787549456e-06,
|
|
"loss": 1.347198724746704,
|
|
"step": 1991
|
|
},
|
|
{
|
|
"epoch": 1.046218487394958,
|
|
"grad_norm": 8.571961955733093,
|
|
"learning_rate": 8.23444091420071e-06,
|
|
"loss": 0.9504708051681519,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 1.0467436974789917,
|
|
"grad_norm": 10.923911768087208,
|
|
"learning_rate": 8.232109832557696e-06,
|
|
"loss": 1.4706783294677734,
|
|
"step": 1993
|
|
},
|
|
{
|
|
"epoch": 1.0472689075630253,
|
|
"grad_norm": 9.433031076323532,
|
|
"learning_rate": 8.229777543491238e-06,
|
|
"loss": 1.5270686149597168,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 1.0477941176470589,
|
|
"grad_norm": 18.548511555963298,
|
|
"learning_rate": 8.227444047872612e-06,
|
|
"loss": 1.6383750438690186,
|
|
"step": 1995
|
|
},
|
|
{
|
|
"epoch": 1.0483193277310925,
|
|
"grad_norm": 9.778095972387245,
|
|
"learning_rate": 8.225109346573544e-06,
|
|
"loss": 1.8342719078063965,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 1.048844537815126,
|
|
"grad_norm": 17.06380107436487,
|
|
"learning_rate": 8.222773440466213e-06,
|
|
"loss": 1.628516674041748,
|
|
"step": 1997
|
|
},
|
|
{
|
|
"epoch": 1.0493697478991597,
|
|
"grad_norm": 9.864098374870848,
|
|
"learning_rate": 8.220436330423243e-06,
|
|
"loss": 0.9155079126358032,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 1.0498949579831933,
|
|
"grad_norm": 10.115094244055996,
|
|
"learning_rate": 8.218098017317715e-06,
|
|
"loss": 2.3998050689697266,
|
|
"step": 1999
|
|
},
|
|
{
|
|
"epoch": 1.050420168067227,
|
|
"grad_norm": 13.536007676797151,
|
|
"learning_rate": 8.215758502023157e-06,
|
|
"loss": 1.2778449058532715,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 1.0509453781512605,
|
|
"grad_norm": 12.233788990512272,
|
|
"learning_rate": 8.213417785413538e-06,
|
|
"loss": 1.4833780527114868,
|
|
"step": 2001
|
|
},
|
|
{
|
|
"epoch": 1.0514705882352942,
|
|
"grad_norm": 13.862242589295523,
|
|
"learning_rate": 8.21107586836329e-06,
|
|
"loss": 1.400753140449524,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 1.0519957983193278,
|
|
"grad_norm": 14.330524845983492,
|
|
"learning_rate": 8.208732751747281e-06,
|
|
"loss": 2.317253589630127,
|
|
"step": 2003
|
|
},
|
|
{
|
|
"epoch": 1.0525210084033614,
|
|
"grad_norm": 14.704479467473012,
|
|
"learning_rate": 8.206388436440833e-06,
|
|
"loss": 2.202234983444214,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 1.053046218487395,
|
|
"grad_norm": 7.315855181709781,
|
|
"learning_rate": 8.204042923319717e-06,
|
|
"loss": 1.7417163848876953,
|
|
"step": 2005
|
|
},
|
|
{
|
|
"epoch": 1.0535714285714286,
|
|
"grad_norm": 10.912332897356237,
|
|
"learning_rate": 8.201696213260149e-06,
|
|
"loss": 1.5026731491088867,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 1.0540966386554622,
|
|
"grad_norm": 55.81202535331172,
|
|
"learning_rate": 8.19934830713879e-06,
|
|
"loss": 5.182719707489014,
|
|
"step": 2007
|
|
},
|
|
{
|
|
"epoch": 1.0546218487394958,
|
|
"grad_norm": 12.774663008107998,
|
|
"learning_rate": 8.196999205832752e-06,
|
|
"loss": 1.316359519958496,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 1.0551470588235294,
|
|
"grad_norm": 15.585678671547443,
|
|
"learning_rate": 8.19464891021959e-06,
|
|
"loss": 2.2544400691986084,
|
|
"step": 2009
|
|
},
|
|
{
|
|
"epoch": 1.055672268907563,
|
|
"grad_norm": 9.725154249611599,
|
|
"learning_rate": 8.19229742117731e-06,
|
|
"loss": 1.4268792867660522,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 1.0561974789915967,
|
|
"grad_norm": 18.92807162103923,
|
|
"learning_rate": 8.189944739584361e-06,
|
|
"loss": 1.7833478450775146,
|
|
"step": 2011
|
|
},
|
|
{
|
|
"epoch": 1.0567226890756303,
|
|
"grad_norm": 10.257885875688178,
|
|
"learning_rate": 8.187590866319635e-06,
|
|
"loss": 1.4846409559249878,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 1.0572478991596639,
|
|
"grad_norm": 11.497196233579782,
|
|
"learning_rate": 8.18523580226247e-06,
|
|
"loss": 1.2766668796539307,
|
|
"step": 2013
|
|
},
|
|
{
|
|
"epoch": 1.0577731092436975,
|
|
"grad_norm": 10.755765096146325,
|
|
"learning_rate": 8.182879548292655e-06,
|
|
"loss": 1.36566162109375,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 1.058298319327731,
|
|
"grad_norm": 8.729101870569439,
|
|
"learning_rate": 8.180522105290414e-06,
|
|
"loss": 1.9430431127548218,
|
|
"step": 2015
|
|
},
|
|
{
|
|
"epoch": 1.0588235294117647,
|
|
"grad_norm": 12.708760338325707,
|
|
"learning_rate": 8.178163474136424e-06,
|
|
"loss": 1.4622400999069214,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 1.0593487394957983,
|
|
"grad_norm": 7.533207633814089,
|
|
"learning_rate": 8.1758036557118e-06,
|
|
"loss": 1.449235200881958,
|
|
"step": 2017
|
|
},
|
|
{
|
|
"epoch": 1.059873949579832,
|
|
"grad_norm": 11.715678520399887,
|
|
"learning_rate": 8.173442650898103e-06,
|
|
"loss": 1.6451451778411865,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 1.0603991596638656,
|
|
"grad_norm": 15.151606191516144,
|
|
"learning_rate": 8.171080460577337e-06,
|
|
"loss": 1.9938178062438965,
|
|
"step": 2019
|
|
},
|
|
{
|
|
"epoch": 1.0609243697478992,
|
|
"grad_norm": 35.448718866794074,
|
|
"learning_rate": 8.168717085631946e-06,
|
|
"loss": 1.7531225681304932,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 1.0614495798319328,
|
|
"grad_norm": 55.681063416463836,
|
|
"learning_rate": 8.166352526944821e-06,
|
|
"loss": 1.7547500133514404,
|
|
"step": 2021
|
|
},
|
|
{
|
|
"epoch": 1.0619747899159664,
|
|
"grad_norm": 7.3171440156645575,
|
|
"learning_rate": 8.163986785399295e-06,
|
|
"loss": 1.7646377086639404,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 1.0625,
|
|
"grad_norm": 15.786531214305574,
|
|
"learning_rate": 8.161619861879136e-06,
|
|
"loss": 2.4931797981262207,
|
|
"step": 2023
|
|
},
|
|
{
|
|
"epoch": 1.0630252100840336,
|
|
"grad_norm": 22.610717897909964,
|
|
"learning_rate": 8.159251757268566e-06,
|
|
"loss": 1.6910812854766846,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 1.0635504201680672,
|
|
"grad_norm": 40.578679605306704,
|
|
"learning_rate": 8.156882472452232e-06,
|
|
"loss": 1.2922301292419434,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 1.0640756302521008,
|
|
"grad_norm": 15.988684800481444,
|
|
"learning_rate": 8.154512008315239e-06,
|
|
"loss": 1.3810336589813232,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 1.0646008403361344,
|
|
"grad_norm": 10.736692668762364,
|
|
"learning_rate": 8.15214036574312e-06,
|
|
"loss": 1.039983868598938,
|
|
"step": 2027
|
|
},
|
|
{
|
|
"epoch": 1.065126050420168,
|
|
"grad_norm": 12.908950532378157,
|
|
"learning_rate": 8.149767545621852e-06,
|
|
"loss": 1.4156992435455322,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 1.0656512605042017,
|
|
"grad_norm": 7.119310520679408,
|
|
"learning_rate": 8.147393548837856e-06,
|
|
"loss": 1.9925599098205566,
|
|
"step": 2029
|
|
},
|
|
{
|
|
"epoch": 1.0661764705882353,
|
|
"grad_norm": 8.347532632013307,
|
|
"learning_rate": 8.145018376277987e-06,
|
|
"loss": 1.4140294790267944,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 1.066701680672269,
|
|
"grad_norm": 17.027405986487327,
|
|
"learning_rate": 8.14264202882954e-06,
|
|
"loss": 1.6106376647949219,
|
|
"step": 2031
|
|
},
|
|
{
|
|
"epoch": 1.0672268907563025,
|
|
"grad_norm": 19.674325053254293,
|
|
"learning_rate": 8.14026450738025e-06,
|
|
"loss": 1.544736623764038,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 1.0677521008403361,
|
|
"grad_norm": 12.161085828150632,
|
|
"learning_rate": 8.137885812818296e-06,
|
|
"loss": 1.5386223793029785,
|
|
"step": 2033
|
|
},
|
|
{
|
|
"epoch": 1.0682773109243697,
|
|
"grad_norm": 12.707797474192757,
|
|
"learning_rate": 8.135505946032285e-06,
|
|
"loss": 1.6691786050796509,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 1.0688025210084033,
|
|
"grad_norm": 12.529033411521157,
|
|
"learning_rate": 8.133124907911268e-06,
|
|
"loss": 1.5454782247543335,
|
|
"step": 2035
|
|
},
|
|
{
|
|
"epoch": 1.069327731092437,
|
|
"grad_norm": 13.596070634937583,
|
|
"learning_rate": 8.130742699344731e-06,
|
|
"loss": 1.7887153625488281,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 1.0698529411764706,
|
|
"grad_norm": 18.118361638657046,
|
|
"learning_rate": 8.128359321222601e-06,
|
|
"loss": 1.795508623123169,
|
|
"step": 2037
|
|
},
|
|
{
|
|
"epoch": 1.0703781512605042,
|
|
"grad_norm": 9.370203496553357,
|
|
"learning_rate": 8.12597477443524e-06,
|
|
"loss": 1.3259203433990479,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 1.0709033613445378,
|
|
"grad_norm": 11.834875007050861,
|
|
"learning_rate": 8.123589059873445e-06,
|
|
"loss": 1.6646111011505127,
|
|
"step": 2039
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 10.787634958856607,
|
|
"learning_rate": 8.121202178428449e-06,
|
|
"loss": 2.476722240447998,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 1.071953781512605,
|
|
"grad_norm": 9.031927869789378,
|
|
"learning_rate": 8.118814130991925e-06,
|
|
"loss": 1.5974336862564087,
|
|
"step": 2041
|
|
},
|
|
{
|
|
"epoch": 1.0724789915966386,
|
|
"grad_norm": 15.376387669285181,
|
|
"learning_rate": 8.116424918455978e-06,
|
|
"loss": 1.0614888668060303,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 1.0730042016806722,
|
|
"grad_norm": 11.152342039453462,
|
|
"learning_rate": 8.114034541713152e-06,
|
|
"loss": 1.2790303230285645,
|
|
"step": 2043
|
|
},
|
|
{
|
|
"epoch": 1.0735294117647058,
|
|
"grad_norm": 14.691697775097392,
|
|
"learning_rate": 8.111643001656417e-06,
|
|
"loss": 1.645697832107544,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 1.0740546218487395,
|
|
"grad_norm": 9.121862327883473,
|
|
"learning_rate": 8.109250299179188e-06,
|
|
"loss": 1.6840964555740356,
|
|
"step": 2045
|
|
},
|
|
{
|
|
"epoch": 1.074579831932773,
|
|
"grad_norm": 10.47633908025849,
|
|
"learning_rate": 8.10685643517531e-06,
|
|
"loss": 1.9050471782684326,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 1.0751050420168067,
|
|
"grad_norm": 9.80912174008259,
|
|
"learning_rate": 8.10446141053906e-06,
|
|
"loss": 2.5355887413024902,
|
|
"step": 2047
|
|
},
|
|
{
|
|
"epoch": 1.0756302521008403,
|
|
"grad_norm": 13.492053149298671,
|
|
"learning_rate": 8.10206522616515e-06,
|
|
"loss": 1.5174318552017212,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 1.076155462184874,
|
|
"grad_norm": 9.565188460047532,
|
|
"learning_rate": 8.09966788294873e-06,
|
|
"loss": 1.2498542070388794,
|
|
"step": 2049
|
|
},
|
|
{
|
|
"epoch": 1.0766806722689075,
|
|
"grad_norm": 15.641539869504182,
|
|
"learning_rate": 8.097269381785373e-06,
|
|
"loss": 2.404829978942871,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 1.0772058823529411,
|
|
"grad_norm": 7.470558374854489,
|
|
"learning_rate": 8.094869723571093e-06,
|
|
"loss": 1.439483880996704,
|
|
"step": 2051
|
|
},
|
|
{
|
|
"epoch": 1.0777310924369747,
|
|
"grad_norm": 9.086649858544119,
|
|
"learning_rate": 8.092468909202335e-06,
|
|
"loss": 1.3623019456863403,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 1.0782563025210083,
|
|
"grad_norm": 12.676821930614663,
|
|
"learning_rate": 8.090066939575972e-06,
|
|
"loss": 1.33333158493042,
|
|
"step": 2053
|
|
},
|
|
{
|
|
"epoch": 1.078781512605042,
|
|
"grad_norm": 12.452525963389826,
|
|
"learning_rate": 8.08766381558931e-06,
|
|
"loss": 1.772153377532959,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 1.0793067226890756,
|
|
"grad_norm": 9.747770629837545,
|
|
"learning_rate": 8.08525953814009e-06,
|
|
"loss": 1.0967566967010498,
|
|
"step": 2055
|
|
},
|
|
{
|
|
"epoch": 1.0798319327731092,
|
|
"grad_norm": 7.561380657211976,
|
|
"learning_rate": 8.08285410812648e-06,
|
|
"loss": 1.398902177810669,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 1.0803571428571428,
|
|
"grad_norm": 12.324074414733897,
|
|
"learning_rate": 8.080447526447079e-06,
|
|
"loss": 1.1446821689605713,
|
|
"step": 2057
|
|
},
|
|
{
|
|
"epoch": 1.0808823529411764,
|
|
"grad_norm": 11.217338342744727,
|
|
"learning_rate": 8.078039794000915e-06,
|
|
"loss": 1.4770509004592896,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 1.08140756302521,
|
|
"grad_norm": 13.735683293515555,
|
|
"learning_rate": 8.075630911687451e-06,
|
|
"loss": 2.7962405681610107,
|
|
"step": 2059
|
|
},
|
|
{
|
|
"epoch": 1.0819327731092436,
|
|
"grad_norm": 12.48060899717405,
|
|
"learning_rate": 8.073220880406576e-06,
|
|
"loss": 1.7615755796432495,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 1.0824579831932772,
|
|
"grad_norm": 11.783077342575183,
|
|
"learning_rate": 8.070809701058606e-06,
|
|
"loss": 0.9123398661613464,
|
|
"step": 2061
|
|
},
|
|
{
|
|
"epoch": 1.0829831932773109,
|
|
"grad_norm": 11.414839447073165,
|
|
"learning_rate": 8.068397374544292e-06,
|
|
"loss": 2.6187493801116943,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 1.0835084033613445,
|
|
"grad_norm": 8.039078127914273,
|
|
"learning_rate": 8.065983901764807e-06,
|
|
"loss": 1.6874656677246094,
|
|
"step": 2063
|
|
},
|
|
{
|
|
"epoch": 1.084033613445378,
|
|
"grad_norm": 9.701868706948737,
|
|
"learning_rate": 8.063569283621754e-06,
|
|
"loss": 2.1172051429748535,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 1.0845588235294117,
|
|
"grad_norm": 11.175470061764607,
|
|
"learning_rate": 8.061153521017169e-06,
|
|
"loss": 1.5343923568725586,
|
|
"step": 2065
|
|
},
|
|
{
|
|
"epoch": 1.0850840336134453,
|
|
"grad_norm": 13.997563887917403,
|
|
"learning_rate": 8.05873661485351e-06,
|
|
"loss": 1.7000303268432617,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 1.085609243697479,
|
|
"grad_norm": 28.031097332273816,
|
|
"learning_rate": 8.056318566033664e-06,
|
|
"loss": 1.5794883966445923,
|
|
"step": 2067
|
|
},
|
|
{
|
|
"epoch": 1.0861344537815125,
|
|
"grad_norm": 12.183818861348108,
|
|
"learning_rate": 8.05389937546094e-06,
|
|
"loss": 1.4774911403656006,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 1.0866596638655461,
|
|
"grad_norm": 10.483688768681391,
|
|
"learning_rate": 8.051479044039086e-06,
|
|
"loss": 1.605708122253418,
|
|
"step": 2069
|
|
},
|
|
{
|
|
"epoch": 1.0871848739495797,
|
|
"grad_norm": 27.820678027151825,
|
|
"learning_rate": 8.049057572672263e-06,
|
|
"loss": 1.5847560167312622,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 1.0877100840336134,
|
|
"grad_norm": 12.4251757392418,
|
|
"learning_rate": 8.046634962265064e-06,
|
|
"loss": 1.5098514556884766,
|
|
"step": 2071
|
|
},
|
|
{
|
|
"epoch": 1.088235294117647,
|
|
"grad_norm": 36.490382393341974,
|
|
"learning_rate": 8.044211213722508e-06,
|
|
"loss": 2.929486036300659,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 1.0887605042016806,
|
|
"grad_norm": 20.939520114666106,
|
|
"learning_rate": 8.041786327950037e-06,
|
|
"loss": 1.748311161994934,
|
|
"step": 2073
|
|
},
|
|
{
|
|
"epoch": 1.0892857142857142,
|
|
"grad_norm": 15.906718423276466,
|
|
"learning_rate": 8.039360305853518e-06,
|
|
"loss": 1.479958415031433,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 1.0898109243697478,
|
|
"grad_norm": 11.035678490765639,
|
|
"learning_rate": 8.036933148339246e-06,
|
|
"loss": 1.3642189502716064,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 1.0903361344537814,
|
|
"grad_norm": 12.328561544618518,
|
|
"learning_rate": 8.034504856313935e-06,
|
|
"loss": 1.1948144435882568,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 1.090861344537815,
|
|
"grad_norm": 9.035793402326892,
|
|
"learning_rate": 8.032075430684724e-06,
|
|
"loss": 1.5175752639770508,
|
|
"step": 2077
|
|
},
|
|
{
|
|
"epoch": 1.0913865546218486,
|
|
"grad_norm": 11.951260351842103,
|
|
"learning_rate": 8.029644872359182e-06,
|
|
"loss": 1.1992278099060059,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 1.0919117647058822,
|
|
"grad_norm": 12.288571411765174,
|
|
"learning_rate": 8.027213182245289e-06,
|
|
"loss": 1.6592459678649902,
|
|
"step": 2079
|
|
},
|
|
{
|
|
"epoch": 1.092436974789916,
|
|
"grad_norm": 8.202280335864222,
|
|
"learning_rate": 8.024780361251458e-06,
|
|
"loss": 1.628820776939392,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 1.0929621848739495,
|
|
"grad_norm": 10.919147602699114,
|
|
"learning_rate": 8.02234641028652e-06,
|
|
"loss": 1.2624070644378662,
|
|
"step": 2081
|
|
},
|
|
{
|
|
"epoch": 1.0934873949579833,
|
|
"grad_norm": 22.946780337331695,
|
|
"learning_rate": 8.019911330259733e-06,
|
|
"loss": 2.224916458129883,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 1.0940126050420167,
|
|
"grad_norm": 9.588830757244613,
|
|
"learning_rate": 8.017475122080767e-06,
|
|
"loss": 1.2903892993927002,
|
|
"step": 2083
|
|
},
|
|
{
|
|
"epoch": 1.0945378151260505,
|
|
"grad_norm": 12.260829010012472,
|
|
"learning_rate": 8.015037786659725e-06,
|
|
"loss": 2.123779058456421,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 1.095063025210084,
|
|
"grad_norm": 13.832226936795907,
|
|
"learning_rate": 8.012599324907121e-06,
|
|
"loss": 1.6024169921875,
|
|
"step": 2085
|
|
},
|
|
{
|
|
"epoch": 1.0955882352941178,
|
|
"grad_norm": 7.579076367169775,
|
|
"learning_rate": 8.010159737733897e-06,
|
|
"loss": 1.3724197149276733,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 1.0961134453781514,
|
|
"grad_norm": 7.8693581818591625,
|
|
"learning_rate": 8.007719026051413e-06,
|
|
"loss": 1.2978019714355469,
|
|
"step": 2087
|
|
},
|
|
{
|
|
"epoch": 1.096638655462185,
|
|
"grad_norm": 10.111588038676686,
|
|
"learning_rate": 8.005277190771447e-06,
|
|
"loss": 0.7179710865020752,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 1.0971638655462186,
|
|
"grad_norm": 15.55957612031918,
|
|
"learning_rate": 8.0028342328062e-06,
|
|
"loss": 1.2068419456481934,
|
|
"step": 2089
|
|
},
|
|
{
|
|
"epoch": 1.0976890756302522,
|
|
"grad_norm": 9.113015245034111,
|
|
"learning_rate": 8.00039015306829e-06,
|
|
"loss": 1.2838717699050903,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 1.0982142857142858,
|
|
"grad_norm": 12.952906569925261,
|
|
"learning_rate": 7.997944952470755e-06,
|
|
"loss": 2.1330454349517822,
|
|
"step": 2091
|
|
},
|
|
{
|
|
"epoch": 1.0987394957983194,
|
|
"grad_norm": 13.445457154747649,
|
|
"learning_rate": 7.995498631927053e-06,
|
|
"loss": 2.0122201442718506,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 1.099264705882353,
|
|
"grad_norm": 11.051591329612487,
|
|
"learning_rate": 7.993051192351056e-06,
|
|
"loss": 2.080228090286255,
|
|
"step": 2093
|
|
},
|
|
{
|
|
"epoch": 1.0997899159663866,
|
|
"grad_norm": 10.63558033130149,
|
|
"learning_rate": 7.990602634657062e-06,
|
|
"loss": 1.3373291492462158,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 1.1003151260504203,
|
|
"grad_norm": 16.169468011674773,
|
|
"learning_rate": 7.988152959759778e-06,
|
|
"loss": 1.3824642896652222,
|
|
"step": 2095
|
|
},
|
|
{
|
|
"epoch": 1.1008403361344539,
|
|
"grad_norm": 11.460722090488222,
|
|
"learning_rate": 7.985702168574335e-06,
|
|
"loss": 1.7380826473236084,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 1.1013655462184875,
|
|
"grad_norm": 12.843943771247455,
|
|
"learning_rate": 7.983250262016276e-06,
|
|
"loss": 1.554076910018921,
|
|
"step": 2097
|
|
},
|
|
{
|
|
"epoch": 1.101890756302521,
|
|
"grad_norm": 11.044942328970958,
|
|
"learning_rate": 7.980797241001563e-06,
|
|
"loss": 1.7797731161117554,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 1.1024159663865547,
|
|
"grad_norm": 11.24677358803186,
|
|
"learning_rate": 7.978343106446575e-06,
|
|
"loss": 2.0195977687835693,
|
|
"step": 2099
|
|
},
|
|
{
|
|
"epoch": 1.1029411764705883,
|
|
"grad_norm": 8.62059118420625,
|
|
"learning_rate": 7.975887859268105e-06,
|
|
"loss": 1.4654607772827148,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 1.103466386554622,
|
|
"grad_norm": 8.437366913653893,
|
|
"learning_rate": 7.973431500383366e-06,
|
|
"loss": 2.535557508468628,
|
|
"step": 2101
|
|
},
|
|
{
|
|
"epoch": 1.1039915966386555,
|
|
"grad_norm": 11.01283641772209,
|
|
"learning_rate": 7.970974030709982e-06,
|
|
"loss": 1.2387416362762451,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 1.1045168067226891,
|
|
"grad_norm": 10.255202846207247,
|
|
"learning_rate": 7.96851545116599e-06,
|
|
"loss": 1.650892734527588,
|
|
"step": 2103
|
|
},
|
|
{
|
|
"epoch": 1.1050420168067228,
|
|
"grad_norm": 13.853695395247032,
|
|
"learning_rate": 7.966055762669846e-06,
|
|
"loss": 2.867412567138672,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 1.1055672268907564,
|
|
"grad_norm": 15.835028427878946,
|
|
"learning_rate": 7.963594966140423e-06,
|
|
"loss": 1.7530300617218018,
|
|
"step": 2105
|
|
},
|
|
{
|
|
"epoch": 1.10609243697479,
|
|
"grad_norm": 8.282839586685641,
|
|
"learning_rate": 7.961133062496999e-06,
|
|
"loss": 2.3559670448303223,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 1.1066176470588236,
|
|
"grad_norm": 9.570546417949428,
|
|
"learning_rate": 7.958670052659274e-06,
|
|
"loss": 1.6627740859985352,
|
|
"step": 2107
|
|
},
|
|
{
|
|
"epoch": 1.1071428571428572,
|
|
"grad_norm": 7.740856763553868,
|
|
"learning_rate": 7.956205937547354e-06,
|
|
"loss": 1.9173784255981445,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 1.1076680672268908,
|
|
"grad_norm": 9.773895398357354,
|
|
"learning_rate": 7.953740718081765e-06,
|
|
"loss": 2.2621545791625977,
|
|
"step": 2109
|
|
},
|
|
{
|
|
"epoch": 1.1081932773109244,
|
|
"grad_norm": 9.583472556089905,
|
|
"learning_rate": 7.951274395183442e-06,
|
|
"loss": 1.1864567995071411,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 1.108718487394958,
|
|
"grad_norm": 15.68589090768525,
|
|
"learning_rate": 7.948806969773731e-06,
|
|
"loss": 1.8923931121826172,
|
|
"step": 2111
|
|
},
|
|
{
|
|
"epoch": 1.1092436974789917,
|
|
"grad_norm": 12.954378909740273,
|
|
"learning_rate": 7.94633844277439e-06,
|
|
"loss": 1.652284860610962,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 1.1097689075630253,
|
|
"grad_norm": 7.644328375634613,
|
|
"learning_rate": 7.943868815107594e-06,
|
|
"loss": 1.6230323314666748,
|
|
"step": 2113
|
|
},
|
|
{
|
|
"epoch": 1.1102941176470589,
|
|
"grad_norm": 11.226280315272911,
|
|
"learning_rate": 7.941398087695923e-06,
|
|
"loss": 2.466500997543335,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 1.1108193277310925,
|
|
"grad_norm": 7.585851375413204,
|
|
"learning_rate": 7.938926261462366e-06,
|
|
"loss": 1.4757699966430664,
|
|
"step": 2115
|
|
},
|
|
{
|
|
"epoch": 1.111344537815126,
|
|
"grad_norm": 7.776935588322972,
|
|
"learning_rate": 7.936453337330332e-06,
|
|
"loss": 1.6728451251983643,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 1.1118697478991597,
|
|
"grad_norm": 21.108057869364007,
|
|
"learning_rate": 7.933979316223632e-06,
|
|
"loss": 0.9731060266494751,
|
|
"step": 2117
|
|
},
|
|
{
|
|
"epoch": 1.1123949579831933,
|
|
"grad_norm": 9.702414396064892,
|
|
"learning_rate": 7.931504199066491e-06,
|
|
"loss": 1.139401912689209,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 1.112920168067227,
|
|
"grad_norm": 10.092397912276617,
|
|
"learning_rate": 7.929027986783538e-06,
|
|
"loss": 1.5177682638168335,
|
|
"step": 2119
|
|
},
|
|
{
|
|
"epoch": 1.1134453781512605,
|
|
"grad_norm": 9.11765591697188,
|
|
"learning_rate": 7.926550680299819e-06,
|
|
"loss": 1.5064119100570679,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 1.1139705882352942,
|
|
"grad_norm": 13.95785702803511,
|
|
"learning_rate": 7.92407228054078e-06,
|
|
"loss": 1.2225053310394287,
|
|
"step": 2121
|
|
},
|
|
{
|
|
"epoch": 1.1144957983193278,
|
|
"grad_norm": 11.100026573328575,
|
|
"learning_rate": 7.921592788432286e-06,
|
|
"loss": 1.8686716556549072,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 1.1150210084033614,
|
|
"grad_norm": 10.567833444457785,
|
|
"learning_rate": 7.919112204900597e-06,
|
|
"loss": 1.3062869310379028,
|
|
"step": 2123
|
|
},
|
|
{
|
|
"epoch": 1.115546218487395,
|
|
"grad_norm": 16.474738009080117,
|
|
"learning_rate": 7.916630530872394e-06,
|
|
"loss": 1.540877342224121,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 1.1160714285714286,
|
|
"grad_norm": 14.35074258298947,
|
|
"learning_rate": 7.914147767274756e-06,
|
|
"loss": 1.9422478675842285,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.1165966386554622,
|
|
"grad_norm": 14.403776564618257,
|
|
"learning_rate": 7.911663915035173e-06,
|
|
"loss": 1.6253186464309692,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 1.1171218487394958,
|
|
"grad_norm": 7.615524001765494,
|
|
"learning_rate": 7.90917897508154e-06,
|
|
"loss": 1.4136857986450195,
|
|
"step": 2127
|
|
},
|
|
{
|
|
"epoch": 1.1176470588235294,
|
|
"grad_norm": 7.8717356840017265,
|
|
"learning_rate": 7.90669294834216e-06,
|
|
"loss": 1.2877161502838135,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 1.118172268907563,
|
|
"grad_norm": 9.996219379659037,
|
|
"learning_rate": 7.904205835745744e-06,
|
|
"loss": 1.1908361911773682,
|
|
"step": 2129
|
|
},
|
|
{
|
|
"epoch": 1.1186974789915967,
|
|
"grad_norm": 10.477496498628517,
|
|
"learning_rate": 7.9017176382214e-06,
|
|
"loss": 1.6736955642700195,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 1.1192226890756303,
|
|
"grad_norm": 11.47395084321555,
|
|
"learning_rate": 7.899228356698651e-06,
|
|
"loss": 1.5920789241790771,
|
|
"step": 2131
|
|
},
|
|
{
|
|
"epoch": 1.1197478991596639,
|
|
"grad_norm": 13.865603087171925,
|
|
"learning_rate": 7.896737992107419e-06,
|
|
"loss": 3.513488292694092,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 1.1202731092436975,
|
|
"grad_norm": 11.555605178973293,
|
|
"learning_rate": 7.894246545378037e-06,
|
|
"loss": 1.7649219036102295,
|
|
"step": 2133
|
|
},
|
|
{
|
|
"epoch": 1.120798319327731,
|
|
"grad_norm": 10.22602902533883,
|
|
"learning_rate": 7.891754017441234e-06,
|
|
"loss": 1.7826954126358032,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 1.1213235294117647,
|
|
"grad_norm": 9.059720444354182,
|
|
"learning_rate": 7.889260409228146e-06,
|
|
"loss": 1.7290338277816772,
|
|
"step": 2135
|
|
},
|
|
{
|
|
"epoch": 1.1218487394957983,
|
|
"grad_norm": 15.914026506393606,
|
|
"learning_rate": 7.886765721670316e-06,
|
|
"loss": 1.7057852745056152,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 1.122373949579832,
|
|
"grad_norm": 11.616104242752517,
|
|
"learning_rate": 7.884269955699689e-06,
|
|
"loss": 1.070406198501587,
|
|
"step": 2137
|
|
},
|
|
{
|
|
"epoch": 1.1228991596638656,
|
|
"grad_norm": 16.155071107074644,
|
|
"learning_rate": 7.881773112248607e-06,
|
|
"loss": 1.948705792427063,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 1.1234243697478992,
|
|
"grad_norm": 12.81143105369926,
|
|
"learning_rate": 7.879275192249822e-06,
|
|
"loss": 1.8320515155792236,
|
|
"step": 2139
|
|
},
|
|
{
|
|
"epoch": 1.1239495798319328,
|
|
"grad_norm": 13.145916077419596,
|
|
"learning_rate": 7.876776196636486e-06,
|
|
"loss": 1.7804217338562012,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 1.1244747899159664,
|
|
"grad_norm": 8.863979098179895,
|
|
"learning_rate": 7.874276126342151e-06,
|
|
"loss": 1.4772311449050903,
|
|
"step": 2141
|
|
},
|
|
{
|
|
"epoch": 1.125,
|
|
"grad_norm": 9.3697796448481,
|
|
"learning_rate": 7.87177498230077e-06,
|
|
"loss": 1.59299635887146,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 1.1255252100840336,
|
|
"grad_norm": 10.06456063773516,
|
|
"learning_rate": 7.869272765446701e-06,
|
|
"loss": 1.3145837783813477,
|
|
"step": 2143
|
|
},
|
|
{
|
|
"epoch": 1.1260504201680672,
|
|
"grad_norm": 15.160868267450002,
|
|
"learning_rate": 7.866769476714697e-06,
|
|
"loss": 1.5451463460922241,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 1.1265756302521008,
|
|
"grad_norm": 10.424644826790757,
|
|
"learning_rate": 7.86426511703992e-06,
|
|
"loss": 1.3202342987060547,
|
|
"step": 2145
|
|
},
|
|
{
|
|
"epoch": 1.1271008403361344,
|
|
"grad_norm": 12.435631858650448,
|
|
"learning_rate": 7.861759687357922e-06,
|
|
"loss": 1.6770918369293213,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 1.127626050420168,
|
|
"grad_norm": 9.180228584474863,
|
|
"learning_rate": 7.859253188604662e-06,
|
|
"loss": 1.8522690534591675,
|
|
"step": 2147
|
|
},
|
|
{
|
|
"epoch": 1.1281512605042017,
|
|
"grad_norm": 14.472194230527451,
|
|
"learning_rate": 7.856745621716495e-06,
|
|
"loss": 1.361525297164917,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 1.1286764705882353,
|
|
"grad_norm": 16.700491541017072,
|
|
"learning_rate": 7.854236987630178e-06,
|
|
"loss": 1.7216304540634155,
|
|
"step": 2149
|
|
},
|
|
{
|
|
"epoch": 1.129201680672269,
|
|
"grad_norm": 40.94757163724544,
|
|
"learning_rate": 7.851727287282863e-06,
|
|
"loss": 2.517025947570801,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.1297268907563025,
|
|
"grad_norm": 13.502250829995372,
|
|
"learning_rate": 7.8492165216121e-06,
|
|
"loss": 1.6959048509597778,
|
|
"step": 2151
|
|
},
|
|
{
|
|
"epoch": 1.1302521008403361,
|
|
"grad_norm": 8.181785474013854,
|
|
"learning_rate": 7.846704691555843e-06,
|
|
"loss": 1.4638583660125732,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 1.1307773109243697,
|
|
"grad_norm": 13.888894644745077,
|
|
"learning_rate": 7.844191798052438e-06,
|
|
"loss": 1.392757534980774,
|
|
"step": 2153
|
|
},
|
|
{
|
|
"epoch": 1.1313025210084033,
|
|
"grad_norm": 10.720030748817441,
|
|
"learning_rate": 7.841677842040628e-06,
|
|
"loss": 1.1821082830429077,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 1.131827731092437,
|
|
"grad_norm": 15.365251726266722,
|
|
"learning_rate": 7.839162824459559e-06,
|
|
"loss": 1.3670696020126343,
|
|
"step": 2155
|
|
},
|
|
{
|
|
"epoch": 1.1323529411764706,
|
|
"grad_norm": 19.855347202280466,
|
|
"learning_rate": 7.836646746248764e-06,
|
|
"loss": 2.9130239486694336,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 1.1328781512605042,
|
|
"grad_norm": 14.311094415567702,
|
|
"learning_rate": 7.834129608348183e-06,
|
|
"loss": 2.0681352615356445,
|
|
"step": 2157
|
|
},
|
|
{
|
|
"epoch": 1.1334033613445378,
|
|
"grad_norm": 11.205340555282687,
|
|
"learning_rate": 7.831611411698141e-06,
|
|
"loss": 2.2508697509765625,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 1.1339285714285714,
|
|
"grad_norm": 7.939795916520177,
|
|
"learning_rate": 7.829092157239369e-06,
|
|
"loss": 1.585735559463501,
|
|
"step": 2159
|
|
},
|
|
{
|
|
"epoch": 1.134453781512605,
|
|
"grad_norm": 22.578910723135277,
|
|
"learning_rate": 7.826571845912985e-06,
|
|
"loss": 1.9568084478378296,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 1.1349789915966386,
|
|
"grad_norm": 12.494036014927381,
|
|
"learning_rate": 7.824050478660506e-06,
|
|
"loss": 2.1708197593688965,
|
|
"step": 2161
|
|
},
|
|
{
|
|
"epoch": 1.1355042016806722,
|
|
"grad_norm": 9.881004788162711,
|
|
"learning_rate": 7.821528056423842e-06,
|
|
"loss": 1.2551255226135254,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 1.1360294117647058,
|
|
"grad_norm": 10.450550682617532,
|
|
"learning_rate": 7.819004580145298e-06,
|
|
"loss": 1.5346543788909912,
|
|
"step": 2163
|
|
},
|
|
{
|
|
"epoch": 1.1365546218487395,
|
|
"grad_norm": 15.25330423969048,
|
|
"learning_rate": 7.816480050767573e-06,
|
|
"loss": 1.47426438331604,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 1.137079831932773,
|
|
"grad_norm": 10.78464785219572,
|
|
"learning_rate": 7.813954469233758e-06,
|
|
"loss": 1.9535887241363525,
|
|
"step": 2165
|
|
},
|
|
{
|
|
"epoch": 1.1376050420168067,
|
|
"grad_norm": 8.468384442509409,
|
|
"learning_rate": 7.811427836487336e-06,
|
|
"loss": 1.2346587181091309,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 1.1381302521008403,
|
|
"grad_norm": 13.7764814417855,
|
|
"learning_rate": 7.808900153472188e-06,
|
|
"loss": 0.9352411031723022,
|
|
"step": 2167
|
|
},
|
|
{
|
|
"epoch": 1.138655462184874,
|
|
"grad_norm": 9.760876736141405,
|
|
"learning_rate": 7.80637142113258e-06,
|
|
"loss": 1.6460061073303223,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 1.1391806722689075,
|
|
"grad_norm": 13.153259347584816,
|
|
"learning_rate": 7.803841640413177e-06,
|
|
"loss": 0.9652544856071472,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 1.1397058823529411,
|
|
"grad_norm": 14.051395771547853,
|
|
"learning_rate": 7.801310812259031e-06,
|
|
"loss": 1.7024542093276978,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 1.1402310924369747,
|
|
"grad_norm": 12.165075438526586,
|
|
"learning_rate": 7.798778937615586e-06,
|
|
"loss": 1.4805266857147217,
|
|
"step": 2171
|
|
},
|
|
{
|
|
"epoch": 1.1407563025210083,
|
|
"grad_norm": 9.555541474279284,
|
|
"learning_rate": 7.79624601742868e-06,
|
|
"loss": 1.645806074142456,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 1.141281512605042,
|
|
"grad_norm": 16.181434082927396,
|
|
"learning_rate": 7.793712052644535e-06,
|
|
"loss": 1.834975004196167,
|
|
"step": 2173
|
|
},
|
|
{
|
|
"epoch": 1.1418067226890756,
|
|
"grad_norm": 13.115189814615517,
|
|
"learning_rate": 7.791177044209773e-06,
|
|
"loss": 1.0914037227630615,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 1.1423319327731092,
|
|
"grad_norm": 8.68477919041166,
|
|
"learning_rate": 7.788640993071397e-06,
|
|
"loss": 1.2928192615509033,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.1428571428571428,
|
|
"grad_norm": 9.277072928900862,
|
|
"learning_rate": 7.786103900176804e-06,
|
|
"loss": 1.5239992141723633,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 1.1433823529411764,
|
|
"grad_norm": 7.165041431498236,
|
|
"learning_rate": 7.783565766473777e-06,
|
|
"loss": 1.7771321535110474,
|
|
"step": 2177
|
|
},
|
|
{
|
|
"epoch": 1.14390756302521,
|
|
"grad_norm": 10.426663279881508,
|
|
"learning_rate": 7.781026592910493e-06,
|
|
"loss": 2.5389771461486816,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 1.1444327731092436,
|
|
"grad_norm": 11.924272441885787,
|
|
"learning_rate": 7.778486380435512e-06,
|
|
"loss": 1.434378743171692,
|
|
"step": 2179
|
|
},
|
|
{
|
|
"epoch": 1.1449579831932772,
|
|
"grad_norm": 7.598280634840082,
|
|
"learning_rate": 7.775945129997788e-06,
|
|
"loss": 1.3532328605651855,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 1.1454831932773109,
|
|
"grad_norm": 12.587811480765048,
|
|
"learning_rate": 7.773402842546654e-06,
|
|
"loss": 1.4346599578857422,
|
|
"step": 2181
|
|
},
|
|
{
|
|
"epoch": 1.1460084033613445,
|
|
"grad_norm": 9.462944816682953,
|
|
"learning_rate": 7.770859519031839e-06,
|
|
"loss": 1.7618328332901,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 1.146533613445378,
|
|
"grad_norm": 11.06245957229266,
|
|
"learning_rate": 7.768315160403453e-06,
|
|
"loss": 1.3378257751464844,
|
|
"step": 2183
|
|
},
|
|
{
|
|
"epoch": 1.1470588235294117,
|
|
"grad_norm": 10.404480736254966,
|
|
"learning_rate": 7.765769767611999e-06,
|
|
"loss": 1.3406996726989746,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 1.1475840336134453,
|
|
"grad_norm": 12.673317864361517,
|
|
"learning_rate": 7.76322334160836e-06,
|
|
"loss": 1.298325538635254,
|
|
"step": 2185
|
|
},
|
|
{
|
|
"epoch": 1.148109243697479,
|
|
"grad_norm": 12.277135317633379,
|
|
"learning_rate": 7.76067588334381e-06,
|
|
"loss": 1.5648902654647827,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 1.1486344537815125,
|
|
"grad_norm": 13.71327764667219,
|
|
"learning_rate": 7.758127393770003e-06,
|
|
"loss": 1.9965567588806152,
|
|
"step": 2187
|
|
},
|
|
{
|
|
"epoch": 1.1491596638655461,
|
|
"grad_norm": 10.60434114484367,
|
|
"learning_rate": 7.755577873838985e-06,
|
|
"loss": 1.3782873153686523,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 1.1496848739495797,
|
|
"grad_norm": 18.137455973454465,
|
|
"learning_rate": 7.75302732450318e-06,
|
|
"loss": 1.3186569213867188,
|
|
"step": 2189
|
|
},
|
|
{
|
|
"epoch": 1.1502100840336134,
|
|
"grad_norm": 18.197280725243864,
|
|
"learning_rate": 7.750475746715403e-06,
|
|
"loss": 2.3285152912139893,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 1.150735294117647,
|
|
"grad_norm": 10.493131901404077,
|
|
"learning_rate": 7.747923141428848e-06,
|
|
"loss": 2.311206340789795,
|
|
"step": 2191
|
|
},
|
|
{
|
|
"epoch": 1.1512605042016806,
|
|
"grad_norm": 9.222473958897405,
|
|
"learning_rate": 7.745369509597095e-06,
|
|
"loss": 1.473219633102417,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 1.1517857142857142,
|
|
"grad_norm": 16.959872297960754,
|
|
"learning_rate": 7.742814852174112e-06,
|
|
"loss": 2.3074145317077637,
|
|
"step": 2193
|
|
},
|
|
{
|
|
"epoch": 1.1523109243697478,
|
|
"grad_norm": 8.303568265808774,
|
|
"learning_rate": 7.740259170114239e-06,
|
|
"loss": 2.263456106185913,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 1.1528361344537816,
|
|
"grad_norm": 10.857625163027615,
|
|
"learning_rate": 7.73770246437221e-06,
|
|
"loss": 0.9129188060760498,
|
|
"step": 2195
|
|
},
|
|
{
|
|
"epoch": 1.153361344537815,
|
|
"grad_norm": 8.814366667068162,
|
|
"learning_rate": 7.735144735903136e-06,
|
|
"loss": 1.7062675952911377,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 1.1538865546218489,
|
|
"grad_norm": 19.082552999553837,
|
|
"learning_rate": 7.73258598566251e-06,
|
|
"loss": 2.3475735187530518,
|
|
"step": 2197
|
|
},
|
|
{
|
|
"epoch": 1.1544117647058822,
|
|
"grad_norm": 9.13030709719154,
|
|
"learning_rate": 7.730026214606207e-06,
|
|
"loss": 1.606765866279602,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 1.154936974789916,
|
|
"grad_norm": 9.45085825425194,
|
|
"learning_rate": 7.727465423690487e-06,
|
|
"loss": 1.6220197677612305,
|
|
"step": 2199
|
|
},
|
|
{
|
|
"epoch": 1.1554621848739495,
|
|
"grad_norm": 7.260769089318586,
|
|
"learning_rate": 7.724903613871986e-06,
|
|
"loss": 1.2804646492004395,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.1559873949579833,
|
|
"grad_norm": 16.98348371327778,
|
|
"learning_rate": 7.72234078610772e-06,
|
|
"loss": 1.0989782810211182,
|
|
"step": 2201
|
|
},
|
|
{
|
|
"epoch": 1.1565126050420167,
|
|
"grad_norm": 13.348591794056514,
|
|
"learning_rate": 7.719776941355093e-06,
|
|
"loss": 1.6219122409820557,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 1.1570378151260505,
|
|
"grad_norm": 14.205782907159508,
|
|
"learning_rate": 7.71721208057188e-06,
|
|
"loss": 1.6735048294067383,
|
|
"step": 2203
|
|
},
|
|
{
|
|
"epoch": 1.157563025210084,
|
|
"grad_norm": 8.44877838656389,
|
|
"learning_rate": 7.714646204716244e-06,
|
|
"loss": 1.04374361038208,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 1.1580882352941178,
|
|
"grad_norm": 12.324779542535993,
|
|
"learning_rate": 7.712079314746716e-06,
|
|
"loss": 1.0110960006713867,
|
|
"step": 2205
|
|
},
|
|
{
|
|
"epoch": 1.1586134453781511,
|
|
"grad_norm": 7.798643108103532,
|
|
"learning_rate": 7.709511411622216e-06,
|
|
"loss": 1.518144130706787,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 1.159138655462185,
|
|
"grad_norm": 9.864624868476044,
|
|
"learning_rate": 7.706942496302039e-06,
|
|
"loss": 1.8752518892288208,
|
|
"step": 2207
|
|
},
|
|
{
|
|
"epoch": 1.1596638655462184,
|
|
"grad_norm": 8.867600811607666,
|
|
"learning_rate": 7.704372569745857e-06,
|
|
"loss": 1.336355209350586,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 1.1601890756302522,
|
|
"grad_norm": 13.112751487090154,
|
|
"learning_rate": 7.701801632913722e-06,
|
|
"loss": 2.052809953689575,
|
|
"step": 2209
|
|
},
|
|
{
|
|
"epoch": 1.1607142857142858,
|
|
"grad_norm": 11.693376323968234,
|
|
"learning_rate": 7.69922968676606e-06,
|
|
"loss": 1.4469162225723267,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 1.1612394957983194,
|
|
"grad_norm": 10.919647757046446,
|
|
"learning_rate": 7.69665673226368e-06,
|
|
"loss": 1.6109741926193237,
|
|
"step": 2211
|
|
},
|
|
{
|
|
"epoch": 1.161764705882353,
|
|
"grad_norm": 10.553175507569769,
|
|
"learning_rate": 7.69408277036776e-06,
|
|
"loss": 1.6837149858474731,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 1.1622899159663866,
|
|
"grad_norm": 20.045312743210857,
|
|
"learning_rate": 7.691507802039861e-06,
|
|
"loss": 1.4205944538116455,
|
|
"step": 2213
|
|
},
|
|
{
|
|
"epoch": 1.1628151260504203,
|
|
"grad_norm": 11.196244236622643,
|
|
"learning_rate": 7.688931828241916e-06,
|
|
"loss": 1.208276629447937,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 1.1633403361344539,
|
|
"grad_norm": 11.563170174544473,
|
|
"learning_rate": 7.686354849936235e-06,
|
|
"loss": 1.4215166568756104,
|
|
"step": 2215
|
|
},
|
|
{
|
|
"epoch": 1.1638655462184875,
|
|
"grad_norm": 11.349779155393156,
|
|
"learning_rate": 7.683776868085502e-06,
|
|
"loss": 0.8596599102020264,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 1.164390756302521,
|
|
"grad_norm": 10.356674416924188,
|
|
"learning_rate": 7.68119788365278e-06,
|
|
"loss": 1.4210429191589355,
|
|
"step": 2217
|
|
},
|
|
{
|
|
"epoch": 1.1649159663865547,
|
|
"grad_norm": 10.493863437026867,
|
|
"learning_rate": 7.678617897601501e-06,
|
|
"loss": 1.152337908744812,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 1.1654411764705883,
|
|
"grad_norm": 8.61595920869213,
|
|
"learning_rate": 7.676036910895475e-06,
|
|
"loss": 1.4171079397201538,
|
|
"step": 2219
|
|
},
|
|
{
|
|
"epoch": 1.165966386554622,
|
|
"grad_norm": 11.934352631186487,
|
|
"learning_rate": 7.673454924498882e-06,
|
|
"loss": 1.6628780364990234,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 1.1664915966386555,
|
|
"grad_norm": 11.17731569518165,
|
|
"learning_rate": 7.670871939376281e-06,
|
|
"loss": 0.9881746768951416,
|
|
"step": 2221
|
|
},
|
|
{
|
|
"epoch": 1.1670168067226891,
|
|
"grad_norm": 10.279571354553513,
|
|
"learning_rate": 7.668287956492601e-06,
|
|
"loss": 1.43239426612854,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 1.1675420168067228,
|
|
"grad_norm": 9.813795062381661,
|
|
"learning_rate": 7.665702976813142e-06,
|
|
"loss": 1.773823857307434,
|
|
"step": 2223
|
|
},
|
|
{
|
|
"epoch": 1.1680672268907564,
|
|
"grad_norm": 15.557281978419983,
|
|
"learning_rate": 7.663117001303581e-06,
|
|
"loss": 1.0854368209838867,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 1.16859243697479,
|
|
"grad_norm": 16.850206546046667,
|
|
"learning_rate": 7.660530030929961e-06,
|
|
"loss": 1.9169096946716309,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.1691176470588236,
|
|
"grad_norm": 8.202714341848928,
|
|
"learning_rate": 7.657942066658701e-06,
|
|
"loss": 1.2993048429489136,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 1.1696428571428572,
|
|
"grad_norm": 10.278018449182921,
|
|
"learning_rate": 7.65535310945659e-06,
|
|
"loss": 1.7205994129180908,
|
|
"step": 2227
|
|
},
|
|
{
|
|
"epoch": 1.1701680672268908,
|
|
"grad_norm": 14.789210296516265,
|
|
"learning_rate": 7.65276316029079e-06,
|
|
"loss": 1.8085758686065674,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 1.1706932773109244,
|
|
"grad_norm": 16.328586878063675,
|
|
"learning_rate": 7.650172220128828e-06,
|
|
"loss": 1.6963579654693604,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 1.171218487394958,
|
|
"grad_norm": 16.158694679158433,
|
|
"learning_rate": 7.647580289938607e-06,
|
|
"loss": 1.520153284072876,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 1.1717436974789917,
|
|
"grad_norm": 14.191398364207553,
|
|
"learning_rate": 7.644987370688399e-06,
|
|
"loss": 1.472973108291626,
|
|
"step": 2231
|
|
},
|
|
{
|
|
"epoch": 1.1722689075630253,
|
|
"grad_norm": 6.213348496719508,
|
|
"learning_rate": 7.642393463346843e-06,
|
|
"loss": 1.6185429096221924,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 1.1727941176470589,
|
|
"grad_norm": 8.854404306709156,
|
|
"learning_rate": 7.639798568882947e-06,
|
|
"loss": 1.7556939125061035,
|
|
"step": 2233
|
|
},
|
|
{
|
|
"epoch": 1.1733193277310925,
|
|
"grad_norm": 9.953899976846554,
|
|
"learning_rate": 7.63720268826609e-06,
|
|
"loss": 1.6055560111999512,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 1.173844537815126,
|
|
"grad_norm": 10.277069630384608,
|
|
"learning_rate": 7.634605822466022e-06,
|
|
"loss": 1.6435586214065552,
|
|
"step": 2235
|
|
},
|
|
{
|
|
"epoch": 1.1743697478991597,
|
|
"grad_norm": 8.86394125184648,
|
|
"learning_rate": 7.632007972452851e-06,
|
|
"loss": 1.2334827184677124,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 1.1748949579831933,
|
|
"grad_norm": 7.773956370147138,
|
|
"learning_rate": 7.629409139197063e-06,
|
|
"loss": 1.8761959075927734,
|
|
"step": 2237
|
|
},
|
|
{
|
|
"epoch": 1.175420168067227,
|
|
"grad_norm": 10.846228404668672,
|
|
"learning_rate": 7.626809323669506e-06,
|
|
"loss": 1.469628095626831,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 1.1759453781512605,
|
|
"grad_norm": 13.28251458961807,
|
|
"learning_rate": 7.624208526841399e-06,
|
|
"loss": 1.7837494611740112,
|
|
"step": 2239
|
|
},
|
|
{
|
|
"epoch": 1.1764705882352942,
|
|
"grad_norm": 7.602935698634584,
|
|
"learning_rate": 7.621606749684323e-06,
|
|
"loss": 0.9383059740066528,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 1.1769957983193278,
|
|
"grad_norm": 8.484095831447153,
|
|
"learning_rate": 7.619003993170226e-06,
|
|
"loss": 1.2930907011032104,
|
|
"step": 2241
|
|
},
|
|
{
|
|
"epoch": 1.1775210084033614,
|
|
"grad_norm": 16.7328276650519,
|
|
"learning_rate": 7.616400258271426e-06,
|
|
"loss": 1.5919756889343262,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 1.178046218487395,
|
|
"grad_norm": 11.454158982382433,
|
|
"learning_rate": 7.613795545960602e-06,
|
|
"loss": 1.1217150688171387,
|
|
"step": 2243
|
|
},
|
|
{
|
|
"epoch": 1.1785714285714286,
|
|
"grad_norm": 18.35984108956994,
|
|
"learning_rate": 7.611189857210801e-06,
|
|
"loss": 1.9384922981262207,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 1.1790966386554622,
|
|
"grad_norm": 11.217826540787774,
|
|
"learning_rate": 7.608583192995433e-06,
|
|
"loss": 1.3692917823791504,
|
|
"step": 2245
|
|
},
|
|
{
|
|
"epoch": 1.1796218487394958,
|
|
"grad_norm": 9.570086657338253,
|
|
"learning_rate": 7.605975554288272e-06,
|
|
"loss": 1.1451338529586792,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 1.1801470588235294,
|
|
"grad_norm": 11.920033506330194,
|
|
"learning_rate": 7.603366942063457e-06,
|
|
"loss": 1.7379136085510254,
|
|
"step": 2247
|
|
},
|
|
{
|
|
"epoch": 1.180672268907563,
|
|
"grad_norm": 9.928107310522863,
|
|
"learning_rate": 7.60075735729549e-06,
|
|
"loss": 1.9680445194244385,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 1.1811974789915967,
|
|
"grad_norm": 10.095289179159145,
|
|
"learning_rate": 7.598146800959238e-06,
|
|
"loss": 1.7297587394714355,
|
|
"step": 2249
|
|
},
|
|
{
|
|
"epoch": 1.1817226890756303,
|
|
"grad_norm": 17.006755815106438,
|
|
"learning_rate": 7.595535274029933e-06,
|
|
"loss": 1.7141847610473633,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.1822478991596639,
|
|
"grad_norm": 15.035510539089135,
|
|
"learning_rate": 7.592922777483162e-06,
|
|
"loss": 1.4125642776489258,
|
|
"step": 2251
|
|
},
|
|
{
|
|
"epoch": 1.1827731092436975,
|
|
"grad_norm": 9.331982226354048,
|
|
"learning_rate": 7.590309312294879e-06,
|
|
"loss": 1.4077017307281494,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 1.183298319327731,
|
|
"grad_norm": 11.39061833765179,
|
|
"learning_rate": 7.5876948794414015e-06,
|
|
"loss": 1.595273494720459,
|
|
"step": 2253
|
|
},
|
|
{
|
|
"epoch": 1.1838235294117647,
|
|
"grad_norm": 16.170756474309968,
|
|
"learning_rate": 7.585079479899407e-06,
|
|
"loss": 1.803749918937683,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 1.1843487394957983,
|
|
"grad_norm": 11.416218779228032,
|
|
"learning_rate": 7.58246311464593e-06,
|
|
"loss": 1.7962976694107056,
|
|
"step": 2255
|
|
},
|
|
{
|
|
"epoch": 1.184873949579832,
|
|
"grad_norm": 11.930061478371591,
|
|
"learning_rate": 7.579845784658373e-06,
|
|
"loss": 1.7797696590423584,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 1.1853991596638656,
|
|
"grad_norm": 22.18371416276764,
|
|
"learning_rate": 7.577227490914495e-06,
|
|
"loss": 1.3999063968658447,
|
|
"step": 2257
|
|
},
|
|
{
|
|
"epoch": 1.1859243697478992,
|
|
"grad_norm": 13.512564010610445,
|
|
"learning_rate": 7.5746082343924146e-06,
|
|
"loss": 1.5380282402038574,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 1.1864495798319328,
|
|
"grad_norm": 14.31168018522443,
|
|
"learning_rate": 7.571988016070611e-06,
|
|
"loss": 1.2362651824951172,
|
|
"step": 2259
|
|
},
|
|
{
|
|
"epoch": 1.1869747899159664,
|
|
"grad_norm": 7.639694621889757,
|
|
"learning_rate": 7.569366836927921e-06,
|
|
"loss": 1.8090265989303589,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 1.1875,
|
|
"grad_norm": 8.50071725348861,
|
|
"learning_rate": 7.5667446979435445e-06,
|
|
"loss": 1.4643746614456177,
|
|
"step": 2261
|
|
},
|
|
{
|
|
"epoch": 1.1880252100840336,
|
|
"grad_norm": 7.557482951144118,
|
|
"learning_rate": 7.564121600097037e-06,
|
|
"loss": 1.645832896232605,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 1.1885504201680672,
|
|
"grad_norm": 12.676426625212807,
|
|
"learning_rate": 7.561497544368309e-06,
|
|
"loss": 1.6147786378860474,
|
|
"step": 2263
|
|
},
|
|
{
|
|
"epoch": 1.1890756302521008,
|
|
"grad_norm": 16.807843359095575,
|
|
"learning_rate": 7.558872531737635e-06,
|
|
"loss": 1.0271517038345337,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 1.1896008403361344,
|
|
"grad_norm": 7.801099310746753,
|
|
"learning_rate": 7.556246563185648e-06,
|
|
"loss": 1.6632678508758545,
|
|
"step": 2265
|
|
},
|
|
{
|
|
"epoch": 1.190126050420168,
|
|
"grad_norm": 9.26191272675503,
|
|
"learning_rate": 7.553619639693328e-06,
|
|
"loss": 1.1168607473373413,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 1.1906512605042017,
|
|
"grad_norm": 10.528482369183887,
|
|
"learning_rate": 7.550991762242022e-06,
|
|
"loss": 1.5746139287948608,
|
|
"step": 2267
|
|
},
|
|
{
|
|
"epoch": 1.1911764705882353,
|
|
"grad_norm": 9.81673524549289,
|
|
"learning_rate": 7.5483629318134285e-06,
|
|
"loss": 2.125807762145996,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 1.191701680672269,
|
|
"grad_norm": 10.24554407141191,
|
|
"learning_rate": 7.545733149389605e-06,
|
|
"loss": 1.7941548824310303,
|
|
"step": 2269
|
|
},
|
|
{
|
|
"epoch": 1.1922268907563025,
|
|
"grad_norm": 17.2128839696117,
|
|
"learning_rate": 7.5431024159529585e-06,
|
|
"loss": 1.520341396331787,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 1.1927521008403361,
|
|
"grad_norm": 7.006581734437614,
|
|
"learning_rate": 7.540470732486258e-06,
|
|
"loss": 1.9107754230499268,
|
|
"step": 2271
|
|
},
|
|
{
|
|
"epoch": 1.1932773109243697,
|
|
"grad_norm": 8.43516927711525,
|
|
"learning_rate": 7.537838099972628e-06,
|
|
"loss": 1.8271441459655762,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 1.1938025210084033,
|
|
"grad_norm": 9.194221379236254,
|
|
"learning_rate": 7.535204519395538e-06,
|
|
"loss": 1.4770328998565674,
|
|
"step": 2273
|
|
},
|
|
{
|
|
"epoch": 1.194327731092437,
|
|
"grad_norm": 13.16455808693371,
|
|
"learning_rate": 7.5325699917388226e-06,
|
|
"loss": 2.21441912651062,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 1.1948529411764706,
|
|
"grad_norm": 24.47272132447887,
|
|
"learning_rate": 7.529934517986663e-06,
|
|
"loss": 2.2615766525268555,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.1953781512605042,
|
|
"grad_norm": 22.196531362086226,
|
|
"learning_rate": 7.5272980991236015e-06,
|
|
"loss": 1.48537015914917,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 1.1959033613445378,
|
|
"grad_norm": 10.30229607018962,
|
|
"learning_rate": 7.5246607361345215e-06,
|
|
"loss": 0.6189466714859009,
|
|
"step": 2277
|
|
},
|
|
{
|
|
"epoch": 1.1964285714285714,
|
|
"grad_norm": 11.89676089870916,
|
|
"learning_rate": 7.522022430004672e-06,
|
|
"loss": 1.6322544813156128,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 1.196953781512605,
|
|
"grad_norm": 14.66246786517055,
|
|
"learning_rate": 7.519383181719644e-06,
|
|
"loss": 1.8301173448562622,
|
|
"step": 2279
|
|
},
|
|
{
|
|
"epoch": 1.1974789915966386,
|
|
"grad_norm": 12.142503576275619,
|
|
"learning_rate": 7.516742992265389e-06,
|
|
"loss": 1.1744235754013062,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 1.1980042016806722,
|
|
"grad_norm": 13.850062553650906,
|
|
"learning_rate": 7.514101862628203e-06,
|
|
"loss": 1.7453515529632568,
|
|
"step": 2281
|
|
},
|
|
{
|
|
"epoch": 1.1985294117647058,
|
|
"grad_norm": 23.510428274436297,
|
|
"learning_rate": 7.511459793794736e-06,
|
|
"loss": 2.7380056381225586,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 1.1990546218487395,
|
|
"grad_norm": 11.481471300399695,
|
|
"learning_rate": 7.508816786751991e-06,
|
|
"loss": 1.2900176048278809,
|
|
"step": 2283
|
|
},
|
|
{
|
|
"epoch": 1.199579831932773,
|
|
"grad_norm": 8.807838434433554,
|
|
"learning_rate": 7.506172842487321e-06,
|
|
"loss": 1.7967376708984375,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 1.2001050420168067,
|
|
"grad_norm": 11.144029341780646,
|
|
"learning_rate": 7.503527961988422e-06,
|
|
"loss": 1.5620723962783813,
|
|
"step": 2285
|
|
},
|
|
{
|
|
"epoch": 1.2006302521008403,
|
|
"grad_norm": 13.161074191782578,
|
|
"learning_rate": 7.500882146243349e-06,
|
|
"loss": 1.4015578031539917,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 1.201155462184874,
|
|
"grad_norm": 11.326606985210484,
|
|
"learning_rate": 7.498235396240505e-06,
|
|
"loss": 1.5541796684265137,
|
|
"step": 2287
|
|
},
|
|
{
|
|
"epoch": 1.2016806722689075,
|
|
"grad_norm": 11.165684930086023,
|
|
"learning_rate": 7.495587712968637e-06,
|
|
"loss": 1.4527666568756104,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 1.2022058823529411,
|
|
"grad_norm": 10.299875594492086,
|
|
"learning_rate": 7.492939097416842e-06,
|
|
"loss": 1.3378864526748657,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 1.2027310924369747,
|
|
"grad_norm": 12.416826127313424,
|
|
"learning_rate": 7.49028955057457e-06,
|
|
"loss": 1.7873291969299316,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 1.2032563025210083,
|
|
"grad_norm": 7.324940705463291,
|
|
"learning_rate": 7.487639073431615e-06,
|
|
"loss": 0.9052544832229614,
|
|
"step": 2291
|
|
},
|
|
{
|
|
"epoch": 1.203781512605042,
|
|
"grad_norm": 11.160324785999983,
|
|
"learning_rate": 7.4849876669781175e-06,
|
|
"loss": 1.3971418142318726,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 1.2043067226890756,
|
|
"grad_norm": 9.812802069170903,
|
|
"learning_rate": 7.482335332204568e-06,
|
|
"loss": 1.3695425987243652,
|
|
"step": 2293
|
|
},
|
|
{
|
|
"epoch": 1.2048319327731092,
|
|
"grad_norm": 24.923735606326765,
|
|
"learning_rate": 7.4796820701018025e-06,
|
|
"loss": 1.7986787557601929,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 1.2053571428571428,
|
|
"grad_norm": 25.29305918756146,
|
|
"learning_rate": 7.477027881661003e-06,
|
|
"loss": 1.5392940044403076,
|
|
"step": 2295
|
|
},
|
|
{
|
|
"epoch": 1.2058823529411764,
|
|
"grad_norm": 11.143561320900542,
|
|
"learning_rate": 7.4743727678737e-06,
|
|
"loss": 1.5945121049880981,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 1.20640756302521,
|
|
"grad_norm": 20.283242988904952,
|
|
"learning_rate": 7.471716729731764e-06,
|
|
"loss": 1.3238778114318848,
|
|
"step": 2297
|
|
},
|
|
{
|
|
"epoch": 1.2069327731092436,
|
|
"grad_norm": 14.612581857378458,
|
|
"learning_rate": 7.469059768227419e-06,
|
|
"loss": 1.3909658193588257,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 1.2074579831932772,
|
|
"grad_norm": 13.84989681165633,
|
|
"learning_rate": 7.466401884353227e-06,
|
|
"loss": 1.689344882965088,
|
|
"step": 2299
|
|
},
|
|
{
|
|
"epoch": 1.2079831932773109,
|
|
"grad_norm": 15.509073178148029,
|
|
"learning_rate": 7.4637430791020974e-06,
|
|
"loss": 1.5234793424606323,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.2085084033613445,
|
|
"grad_norm": 13.34196354051738,
|
|
"learning_rate": 7.461083353467283e-06,
|
|
"loss": 1.6402065753936768,
|
|
"step": 2301
|
|
},
|
|
{
|
|
"epoch": 1.209033613445378,
|
|
"grad_norm": 15.28186026868191,
|
|
"learning_rate": 7.458422708442382e-06,
|
|
"loss": 1.3864027261734009,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 1.2095588235294117,
|
|
"grad_norm": 11.622612511011315,
|
|
"learning_rate": 7.455761145021335e-06,
|
|
"loss": 1.594147801399231,
|
|
"step": 2303
|
|
},
|
|
{
|
|
"epoch": 1.2100840336134453,
|
|
"grad_norm": 12.765662852247218,
|
|
"learning_rate": 7.453098664198426e-06,
|
|
"loss": 1.1258426904678345,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 1.210609243697479,
|
|
"grad_norm": 14.58211207292688,
|
|
"learning_rate": 7.450435266968279e-06,
|
|
"loss": 1.759843349456787,
|
|
"step": 2305
|
|
},
|
|
{
|
|
"epoch": 1.2111344537815125,
|
|
"grad_norm": 15.514594495150906,
|
|
"learning_rate": 7.447770954325866e-06,
|
|
"loss": 1.5733953714370728,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 1.2116596638655461,
|
|
"grad_norm": 12.880778086443373,
|
|
"learning_rate": 7.445105727266496e-06,
|
|
"loss": 2.0581679344177246,
|
|
"step": 2307
|
|
},
|
|
{
|
|
"epoch": 1.2121848739495797,
|
|
"grad_norm": 15.249229861220583,
|
|
"learning_rate": 7.4424395867858224e-06,
|
|
"loss": 1.3483026027679443,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 1.2127100840336134,
|
|
"grad_norm": 10.515358810176897,
|
|
"learning_rate": 7.4397725338798365e-06,
|
|
"loss": 1.224669337272644,
|
|
"step": 2309
|
|
},
|
|
{
|
|
"epoch": 1.213235294117647,
|
|
"grad_norm": 13.802263456031447,
|
|
"learning_rate": 7.437104569544874e-06,
|
|
"loss": 1.6141116619110107,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 1.2137605042016806,
|
|
"grad_norm": 12.138170482009528,
|
|
"learning_rate": 7.4344356947776106e-06,
|
|
"loss": 1.521715760231018,
|
|
"step": 2311
|
|
},
|
|
{
|
|
"epoch": 1.2142857142857142,
|
|
"grad_norm": 9.396927032693675,
|
|
"learning_rate": 7.431765910575061e-06,
|
|
"loss": 1.1617248058319092,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 1.2148109243697478,
|
|
"grad_norm": 14.34054360977382,
|
|
"learning_rate": 7.429095217934578e-06,
|
|
"loss": 1.523795247077942,
|
|
"step": 2313
|
|
},
|
|
{
|
|
"epoch": 1.2153361344537816,
|
|
"grad_norm": 9.817340736775819,
|
|
"learning_rate": 7.426423617853858e-06,
|
|
"loss": 1.5130796432495117,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 1.215861344537815,
|
|
"grad_norm": 13.588005997380902,
|
|
"learning_rate": 7.423751111330933e-06,
|
|
"loss": 1.6209462881088257,
|
|
"step": 2315
|
|
},
|
|
{
|
|
"epoch": 1.2163865546218489,
|
|
"grad_norm": 10.733814846402563,
|
|
"learning_rate": 7.421077699364174e-06,
|
|
"loss": 1.9656691551208496,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 1.2169117647058822,
|
|
"grad_norm": 15.954637512455726,
|
|
"learning_rate": 7.4184033829522935e-06,
|
|
"loss": 1.733542561531067,
|
|
"step": 2317
|
|
},
|
|
{
|
|
"epoch": 1.217436974789916,
|
|
"grad_norm": 20.652914552902658,
|
|
"learning_rate": 7.415728163094338e-06,
|
|
"loss": 2.0990822315216064,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 1.2179621848739495,
|
|
"grad_norm": 18.289704594032273,
|
|
"learning_rate": 7.413052040789692e-06,
|
|
"loss": 2.069448709487915,
|
|
"step": 2319
|
|
},
|
|
{
|
|
"epoch": 1.2184873949579833,
|
|
"grad_norm": 10.974487038682968,
|
|
"learning_rate": 7.410375017038078e-06,
|
|
"loss": 1.6104955673217773,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 1.2190126050420167,
|
|
"grad_norm": 7.259922667918353,
|
|
"learning_rate": 7.4076970928395565e-06,
|
|
"loss": 1.2447270154953003,
|
|
"step": 2321
|
|
},
|
|
{
|
|
"epoch": 1.2195378151260505,
|
|
"grad_norm": 12.022730022541118,
|
|
"learning_rate": 7.405018269194522e-06,
|
|
"loss": 1.1101574897766113,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 1.220063025210084,
|
|
"grad_norm": 9.148123641674003,
|
|
"learning_rate": 7.402338547103708e-06,
|
|
"loss": 1.99159574508667,
|
|
"step": 2323
|
|
},
|
|
{
|
|
"epoch": 1.2205882352941178,
|
|
"grad_norm": 11.43638906210396,
|
|
"learning_rate": 7.399657927568178e-06,
|
|
"loss": 1.485155701637268,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 1.2211134453781511,
|
|
"grad_norm": 10.90216510145063,
|
|
"learning_rate": 7.396976411589338e-06,
|
|
"loss": 1.0308756828308105,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.221638655462185,
|
|
"grad_norm": 12.655532474708437,
|
|
"learning_rate": 7.3942940001689245e-06,
|
|
"loss": 1.5401026010513306,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 1.2221638655462184,
|
|
"grad_norm": 9.194874093328167,
|
|
"learning_rate": 7.391610694309008e-06,
|
|
"loss": 1.2197831869125366,
|
|
"step": 2327
|
|
},
|
|
{
|
|
"epoch": 1.2226890756302522,
|
|
"grad_norm": 14.753605984197215,
|
|
"learning_rate": 7.388926495011996e-06,
|
|
"loss": 2.316397190093994,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 1.2232142857142858,
|
|
"grad_norm": 9.772386958492579,
|
|
"learning_rate": 7.386241403280629e-06,
|
|
"loss": 1.372429609298706,
|
|
"step": 2329
|
|
},
|
|
{
|
|
"epoch": 1.2237394957983194,
|
|
"grad_norm": 10.631382394346844,
|
|
"learning_rate": 7.3835554201179785e-06,
|
|
"loss": 1.5177174806594849,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 1.224264705882353,
|
|
"grad_norm": 10.98341862924508,
|
|
"learning_rate": 7.380868546527449e-06,
|
|
"loss": 1.1611489057540894,
|
|
"step": 2331
|
|
},
|
|
{
|
|
"epoch": 1.2247899159663866,
|
|
"grad_norm": 12.532395756792724,
|
|
"learning_rate": 7.378180783512784e-06,
|
|
"loss": 1.8947821855545044,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 1.2253151260504203,
|
|
"grad_norm": 12.089171708399894,
|
|
"learning_rate": 7.375492132078051e-06,
|
|
"loss": 1.257406234741211,
|
|
"step": 2333
|
|
},
|
|
{
|
|
"epoch": 1.2258403361344539,
|
|
"grad_norm": 13.513579765433992,
|
|
"learning_rate": 7.372802593227656e-06,
|
|
"loss": 1.256013035774231,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 1.2263655462184875,
|
|
"grad_norm": 12.853310916075031,
|
|
"learning_rate": 7.3701121679663305e-06,
|
|
"loss": 1.4088852405548096,
|
|
"step": 2335
|
|
},
|
|
{
|
|
"epoch": 1.226890756302521,
|
|
"grad_norm": 8.70224293067176,
|
|
"learning_rate": 7.36742085729914e-06,
|
|
"loss": 2.04591965675354,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 1.2274159663865547,
|
|
"grad_norm": 13.45383616067398,
|
|
"learning_rate": 7.364728662231484e-06,
|
|
"loss": 1.4291229248046875,
|
|
"step": 2337
|
|
},
|
|
{
|
|
"epoch": 1.2279411764705883,
|
|
"grad_norm": 18.310273006141156,
|
|
"learning_rate": 7.362035583769087e-06,
|
|
"loss": 1.1931278705596924,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 1.228466386554622,
|
|
"grad_norm": 7.059913921580223,
|
|
"learning_rate": 7.359341622918006e-06,
|
|
"loss": 1.6950139999389648,
|
|
"step": 2339
|
|
},
|
|
{
|
|
"epoch": 1.2289915966386555,
|
|
"grad_norm": 17.606553957763804,
|
|
"learning_rate": 7.356646780684629e-06,
|
|
"loss": 2.0042810440063477,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 1.2295168067226891,
|
|
"grad_norm": 14.03085666214147,
|
|
"learning_rate": 7.353951058075669e-06,
|
|
"loss": 1.4454351663589478,
|
|
"step": 2341
|
|
},
|
|
{
|
|
"epoch": 1.2300420168067228,
|
|
"grad_norm": 8.559323919901624,
|
|
"learning_rate": 7.351254456098172e-06,
|
|
"loss": 1.7152481079101562,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 1.2305672268907564,
|
|
"grad_norm": 11.824404222001439,
|
|
"learning_rate": 7.348556975759512e-06,
|
|
"loss": 1.5654159784317017,
|
|
"step": 2343
|
|
},
|
|
{
|
|
"epoch": 1.23109243697479,
|
|
"grad_norm": 10.644263196876427,
|
|
"learning_rate": 7.34585861806739e-06,
|
|
"loss": 1.801296591758728,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 1.2316176470588236,
|
|
"grad_norm": 13.96733594110418,
|
|
"learning_rate": 7.343159384029833e-06,
|
|
"loss": 1.6108777523040771,
|
|
"step": 2345
|
|
},
|
|
{
|
|
"epoch": 1.2321428571428572,
|
|
"grad_norm": 14.731768021182264,
|
|
"learning_rate": 7.340459274655198e-06,
|
|
"loss": 1.9901306629180908,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 1.2326680672268908,
|
|
"grad_norm": 16.95908361334622,
|
|
"learning_rate": 7.3377582909521705e-06,
|
|
"loss": 1.535915493965149,
|
|
"step": 2347
|
|
},
|
|
{
|
|
"epoch": 1.2331932773109244,
|
|
"grad_norm": 22.4332799229794,
|
|
"learning_rate": 7.335056433929758e-06,
|
|
"loss": 3.0728158950805664,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 1.233718487394958,
|
|
"grad_norm": 14.20740872874293,
|
|
"learning_rate": 7.332353704597299e-06,
|
|
"loss": 1.109440565109253,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 1.2342436974789917,
|
|
"grad_norm": 8.476647453494687,
|
|
"learning_rate": 7.3296501039644515e-06,
|
|
"loss": 1.7299296855926514,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.2347689075630253,
|
|
"grad_norm": 11.261603136272209,
|
|
"learning_rate": 7.326945633041209e-06,
|
|
"loss": 1.9006025791168213,
|
|
"step": 2351
|
|
},
|
|
{
|
|
"epoch": 1.2352941176470589,
|
|
"grad_norm": 11.608106477800181,
|
|
"learning_rate": 7.32424029283788e-06,
|
|
"loss": 1.1014738082885742,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 1.2358193277310925,
|
|
"grad_norm": 7.4838832637857235,
|
|
"learning_rate": 7.321534084365101e-06,
|
|
"loss": 1.101230502128601,
|
|
"step": 2353
|
|
},
|
|
{
|
|
"epoch": 1.236344537815126,
|
|
"grad_norm": 10.786551368424025,
|
|
"learning_rate": 7.318827008633837e-06,
|
|
"loss": 1.6727733612060547,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 1.2368697478991597,
|
|
"grad_norm": 8.95980030612558,
|
|
"learning_rate": 7.316119066655374e-06,
|
|
"loss": 1.7786109447479248,
|
|
"step": 2355
|
|
},
|
|
{
|
|
"epoch": 1.2373949579831933,
|
|
"grad_norm": 10.129048303462964,
|
|
"learning_rate": 7.31341025944132e-06,
|
|
"loss": 1.322527527809143,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 1.237920168067227,
|
|
"grad_norm": 9.381053963879273,
|
|
"learning_rate": 7.310700588003605e-06,
|
|
"loss": 1.10860276222229,
|
|
"step": 2357
|
|
},
|
|
{
|
|
"epoch": 1.2384453781512605,
|
|
"grad_norm": 9.653449934052412,
|
|
"learning_rate": 7.307990053354489e-06,
|
|
"loss": 1.2966623306274414,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 1.2389705882352942,
|
|
"grad_norm": 14.132126149032192,
|
|
"learning_rate": 7.305278656506547e-06,
|
|
"loss": 1.5544002056121826,
|
|
"step": 2359
|
|
},
|
|
{
|
|
"epoch": 1.2394957983193278,
|
|
"grad_norm": 22.343298440599415,
|
|
"learning_rate": 7.3025663984726804e-06,
|
|
"loss": 1.3582324981689453,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 1.2400210084033614,
|
|
"grad_norm": 15.390441555017505,
|
|
"learning_rate": 7.299853280266109e-06,
|
|
"loss": 2.1058707237243652,
|
|
"step": 2361
|
|
},
|
|
{
|
|
"epoch": 1.240546218487395,
|
|
"grad_norm": 15.624905181753281,
|
|
"learning_rate": 7.29713930290038e-06,
|
|
"loss": 1.9919989109039307,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 1.2410714285714286,
|
|
"grad_norm": 8.29412970289628,
|
|
"learning_rate": 7.294424467389354e-06,
|
|
"loss": 1.880364179611206,
|
|
"step": 2363
|
|
},
|
|
{
|
|
"epoch": 1.2415966386554622,
|
|
"grad_norm": 11.708658580648029,
|
|
"learning_rate": 7.291708774747215e-06,
|
|
"loss": 1.5461198091506958,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 1.2421218487394958,
|
|
"grad_norm": 10.28464568270922,
|
|
"learning_rate": 7.28899222598847e-06,
|
|
"loss": 2.2227697372436523,
|
|
"step": 2365
|
|
},
|
|
{
|
|
"epoch": 1.2426470588235294,
|
|
"grad_norm": 9.68029298731685,
|
|
"learning_rate": 7.286274822127943e-06,
|
|
"loss": 1.1307097673416138,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 1.243172268907563,
|
|
"grad_norm": 12.22929327621289,
|
|
"learning_rate": 7.2835565641807784e-06,
|
|
"loss": 1.349056601524353,
|
|
"step": 2367
|
|
},
|
|
{
|
|
"epoch": 1.2436974789915967,
|
|
"grad_norm": 8.766422223819754,
|
|
"learning_rate": 7.280837453162437e-06,
|
|
"loss": 2.0873913764953613,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 1.2442226890756303,
|
|
"grad_norm": 7.639382297018337,
|
|
"learning_rate": 7.278117490088703e-06,
|
|
"loss": 1.324272871017456,
|
|
"step": 2369
|
|
},
|
|
{
|
|
"epoch": 1.2447478991596639,
|
|
"grad_norm": 10.350289263510936,
|
|
"learning_rate": 7.2753966759756775e-06,
|
|
"loss": 1.8540616035461426,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 1.2452731092436975,
|
|
"grad_norm": 9.468841605534482,
|
|
"learning_rate": 7.272675011839776e-06,
|
|
"loss": 2.102241039276123,
|
|
"step": 2371
|
|
},
|
|
{
|
|
"epoch": 1.245798319327731,
|
|
"grad_norm": 9.032877181742217,
|
|
"learning_rate": 7.269952498697734e-06,
|
|
"loss": 1.2577991485595703,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 1.2463235294117647,
|
|
"grad_norm": 12.83783323017104,
|
|
"learning_rate": 7.267229137566607e-06,
|
|
"loss": 1.348679780960083,
|
|
"step": 2373
|
|
},
|
|
{
|
|
"epoch": 1.2468487394957983,
|
|
"grad_norm": 16.509721885073024,
|
|
"learning_rate": 7.2645049294637625e-06,
|
|
"loss": 1.8049670457839966,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 1.247373949579832,
|
|
"grad_norm": 7.679108219991571,
|
|
"learning_rate": 7.261779875406887e-06,
|
|
"loss": 1.522689938545227,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.2478991596638656,
|
|
"grad_norm": 7.993956714427982,
|
|
"learning_rate": 7.259053976413981e-06,
|
|
"loss": 1.6695988178253174,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 1.2484243697478992,
|
|
"grad_norm": 11.598346916315906,
|
|
"learning_rate": 7.256327233503365e-06,
|
|
"loss": 1.1637938022613525,
|
|
"step": 2377
|
|
},
|
|
{
|
|
"epoch": 1.2489495798319328,
|
|
"grad_norm": 19.863618347328224,
|
|
"learning_rate": 7.2535996476936696e-06,
|
|
"loss": 1.5508743524551392,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 1.2494747899159664,
|
|
"grad_norm": 19.81622027458916,
|
|
"learning_rate": 7.2508712200038435e-06,
|
|
"loss": 1.48831307888031,
|
|
"step": 2379
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 11.056709659977496,
|
|
"learning_rate": 7.248141951453148e-06,
|
|
"loss": 1.6738970279693604,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 1.2505252100840336,
|
|
"grad_norm": 7.266503881384918,
|
|
"learning_rate": 7.24541184306116e-06,
|
|
"loss": 1.417891025543213,
|
|
"step": 2381
|
|
},
|
|
{
|
|
"epoch": 1.2510504201680672,
|
|
"grad_norm": 9.303065882150086,
|
|
"learning_rate": 7.24268089584777e-06,
|
|
"loss": 2.12142276763916,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 1.2515756302521008,
|
|
"grad_norm": 6.535764656682369,
|
|
"learning_rate": 7.239949110833182e-06,
|
|
"loss": 1.5944756269454956,
|
|
"step": 2383
|
|
},
|
|
{
|
|
"epoch": 1.2521008403361344,
|
|
"grad_norm": 22.395338083691897,
|
|
"learning_rate": 7.2372164890379106e-06,
|
|
"loss": 1.6734291315078735,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 1.252626050420168,
|
|
"grad_norm": 10.720706252841383,
|
|
"learning_rate": 7.234483031482787e-06,
|
|
"loss": 1.893827199935913,
|
|
"step": 2385
|
|
},
|
|
{
|
|
"epoch": 1.2531512605042017,
|
|
"grad_norm": 8.158464787827441,
|
|
"learning_rate": 7.231748739188951e-06,
|
|
"loss": 1.8776541948318481,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 1.2536764705882353,
|
|
"grad_norm": 10.89127995232285,
|
|
"learning_rate": 7.229013613177856e-06,
|
|
"loss": 1.3742156028747559,
|
|
"step": 2387
|
|
},
|
|
{
|
|
"epoch": 1.254201680672269,
|
|
"grad_norm": 8.674192807216064,
|
|
"learning_rate": 7.2262776544712665e-06,
|
|
"loss": 0.8402234315872192,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 1.2547268907563025,
|
|
"grad_norm": 13.504420594820457,
|
|
"learning_rate": 7.223540864091259e-06,
|
|
"loss": 1.623948097229004,
|
|
"step": 2389
|
|
},
|
|
{
|
|
"epoch": 1.2552521008403361,
|
|
"grad_norm": 10.586645126393956,
|
|
"learning_rate": 7.2208032430602185e-06,
|
|
"loss": 1.0190811157226562,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 1.2557773109243697,
|
|
"grad_norm": 12.57602406447126,
|
|
"learning_rate": 7.218064792400842e-06,
|
|
"loss": 1.052843451499939,
|
|
"step": 2391
|
|
},
|
|
{
|
|
"epoch": 1.2563025210084033,
|
|
"grad_norm": 6.133078075000218,
|
|
"learning_rate": 7.215325513136137e-06,
|
|
"loss": 1.6767473220825195,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 1.256827731092437,
|
|
"grad_norm": 10.340515409303494,
|
|
"learning_rate": 7.2125854062894184e-06,
|
|
"loss": 1.3847999572753906,
|
|
"step": 2393
|
|
},
|
|
{
|
|
"epoch": 1.2573529411764706,
|
|
"grad_norm": 11.643403558581834,
|
|
"learning_rate": 7.209844472884313e-06,
|
|
"loss": 2.0188345909118652,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 1.2578781512605042,
|
|
"grad_norm": 10.25441324221315,
|
|
"learning_rate": 7.207102713944752e-06,
|
|
"loss": 1.5930418968200684,
|
|
"step": 2395
|
|
},
|
|
{
|
|
"epoch": 1.2584033613445378,
|
|
"grad_norm": 14.36265003017186,
|
|
"learning_rate": 7.204360130494981e-06,
|
|
"loss": 1.1003354787826538,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 1.2589285714285714,
|
|
"grad_norm": 9.903410270466072,
|
|
"learning_rate": 7.201616723559548e-06,
|
|
"loss": 1.4389723539352417,
|
|
"step": 2397
|
|
},
|
|
{
|
|
"epoch": 1.259453781512605,
|
|
"grad_norm": 16.36601934860646,
|
|
"learning_rate": 7.198872494163312e-06,
|
|
"loss": 2.5507800579071045,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 1.2599789915966386,
|
|
"grad_norm": 15.286978689183501,
|
|
"learning_rate": 7.19612744333144e-06,
|
|
"loss": 2.1392221450805664,
|
|
"step": 2399
|
|
},
|
|
{
|
|
"epoch": 1.2605042016806722,
|
|
"grad_norm": 17.465508677471693,
|
|
"learning_rate": 7.193381572089402e-06,
|
|
"loss": 1.398755669593811,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.2610294117647058,
|
|
"grad_norm": 11.700706016241789,
|
|
"learning_rate": 7.190634881462976e-06,
|
|
"loss": 1.5313366651535034,
|
|
"step": 2401
|
|
},
|
|
{
|
|
"epoch": 1.2615546218487395,
|
|
"grad_norm": 11.195516448088934,
|
|
"learning_rate": 7.18788737247825e-06,
|
|
"loss": 1.6762734651565552,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 1.262079831932773,
|
|
"grad_norm": 10.082021172255459,
|
|
"learning_rate": 7.185139046161611e-06,
|
|
"loss": 1.2127187252044678,
|
|
"step": 2403
|
|
},
|
|
{
|
|
"epoch": 1.2626050420168067,
|
|
"grad_norm": 13.058806553080483,
|
|
"learning_rate": 7.182389903539757e-06,
|
|
"loss": 1.383756160736084,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 1.2631302521008403,
|
|
"grad_norm": 22.872980267738292,
|
|
"learning_rate": 7.179639945639688e-06,
|
|
"loss": 1.0362639427185059,
|
|
"step": 2405
|
|
},
|
|
{
|
|
"epoch": 1.263655462184874,
|
|
"grad_norm": 13.011234775175142,
|
|
"learning_rate": 7.1768891734887095e-06,
|
|
"loss": 2.003307342529297,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 1.2641806722689075,
|
|
"grad_norm": 13.357718438880962,
|
|
"learning_rate": 7.174137588114432e-06,
|
|
"loss": 1.7289718389511108,
|
|
"step": 2407
|
|
},
|
|
{
|
|
"epoch": 1.2647058823529411,
|
|
"grad_norm": 8.875762013574194,
|
|
"learning_rate": 7.171385190544766e-06,
|
|
"loss": 1.2428697347640991,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 1.2652310924369747,
|
|
"grad_norm": 13.211830534036327,
|
|
"learning_rate": 7.168631981807931e-06,
|
|
"loss": 1.3453094959259033,
|
|
"step": 2409
|
|
},
|
|
{
|
|
"epoch": 1.2657563025210083,
|
|
"grad_norm": 17.40159836218747,
|
|
"learning_rate": 7.165877962932444e-06,
|
|
"loss": 2.3050365447998047,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 1.266281512605042,
|
|
"grad_norm": 9.799835084766892,
|
|
"learning_rate": 7.1631231349471306e-06,
|
|
"loss": 2.27128267288208,
|
|
"step": 2411
|
|
},
|
|
{
|
|
"epoch": 1.2668067226890756,
|
|
"grad_norm": 16.14197538278971,
|
|
"learning_rate": 7.160367498881113e-06,
|
|
"loss": 1.8228071928024292,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 1.2673319327731092,
|
|
"grad_norm": 15.260337123016297,
|
|
"learning_rate": 7.15761105576382e-06,
|
|
"loss": 1.5178961753845215,
|
|
"step": 2413
|
|
},
|
|
{
|
|
"epoch": 1.2678571428571428,
|
|
"grad_norm": 8.091406049823233,
|
|
"learning_rate": 7.1548538066249776e-06,
|
|
"loss": 1.494361400604248,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 1.2683823529411764,
|
|
"grad_norm": 12.0098589479205,
|
|
"learning_rate": 7.152095752494616e-06,
|
|
"loss": 1.3326354026794434,
|
|
"step": 2415
|
|
},
|
|
{
|
|
"epoch": 1.26890756302521,
|
|
"grad_norm": 9.095525866005758,
|
|
"learning_rate": 7.149336894403064e-06,
|
|
"loss": 0.7168285250663757,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 1.2694327731092436,
|
|
"grad_norm": 14.397071140794958,
|
|
"learning_rate": 7.1465772333809524e-06,
|
|
"loss": 1.0425605773925781,
|
|
"step": 2417
|
|
},
|
|
{
|
|
"epoch": 1.2699579831932772,
|
|
"grad_norm": 9.501871797516532,
|
|
"learning_rate": 7.143816770459211e-06,
|
|
"loss": 1.8473377227783203,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 1.2704831932773109,
|
|
"grad_norm": 11.960339149604845,
|
|
"learning_rate": 7.141055506669072e-06,
|
|
"loss": 1.3001151084899902,
|
|
"step": 2419
|
|
},
|
|
{
|
|
"epoch": 1.2710084033613445,
|
|
"grad_norm": 12.42609961220722,
|
|
"learning_rate": 7.13829344304206e-06,
|
|
"loss": 1.3024431467056274,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 1.271533613445378,
|
|
"grad_norm": 15.451523377574512,
|
|
"learning_rate": 7.1355305806100036e-06,
|
|
"loss": 2.631690263748169,
|
|
"step": 2421
|
|
},
|
|
{
|
|
"epoch": 1.2720588235294117,
|
|
"grad_norm": 14.468136365218367,
|
|
"learning_rate": 7.132766920405033e-06,
|
|
"loss": 2.0492539405822754,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 1.2725840336134453,
|
|
"grad_norm": 7.644834895154859,
|
|
"learning_rate": 7.130002463459569e-06,
|
|
"loss": 1.053494930267334,
|
|
"step": 2423
|
|
},
|
|
{
|
|
"epoch": 1.273109243697479,
|
|
"grad_norm": 9.407528896608309,
|
|
"learning_rate": 7.1272372108063315e-06,
|
|
"loss": 1.9194227457046509,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 1.2736344537815127,
|
|
"grad_norm": 15.39001144716507,
|
|
"learning_rate": 7.124471163478344e-06,
|
|
"loss": 1.0588860511779785,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.2741596638655461,
|
|
"grad_norm": 14.90338302375827,
|
|
"learning_rate": 7.1217043225089196e-06,
|
|
"loss": 1.0698069334030151,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 1.27468487394958,
|
|
"grad_norm": 19.89454204903591,
|
|
"learning_rate": 7.118936688931672e-06,
|
|
"loss": 1.8833686113357544,
|
|
"step": 2427
|
|
},
|
|
{
|
|
"epoch": 1.2752100840336134,
|
|
"grad_norm": 13.361454987704288,
|
|
"learning_rate": 7.1161682637805065e-06,
|
|
"loss": 1.7349896430969238,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 1.2757352941176472,
|
|
"grad_norm": 9.598161833500898,
|
|
"learning_rate": 7.113399048089631e-06,
|
|
"loss": 1.980614185333252,
|
|
"step": 2429
|
|
},
|
|
{
|
|
"epoch": 1.2762605042016806,
|
|
"grad_norm": 15.600245357473879,
|
|
"learning_rate": 7.110629042893543e-06,
|
|
"loss": 1.5294065475463867,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 1.2767857142857144,
|
|
"grad_norm": 18.83154035782719,
|
|
"learning_rate": 7.1078582492270385e-06,
|
|
"loss": 1.830521583557129,
|
|
"step": 2431
|
|
},
|
|
{
|
|
"epoch": 1.2773109243697478,
|
|
"grad_norm": 9.997210701856849,
|
|
"learning_rate": 7.105086668125205e-06,
|
|
"loss": 2.4552969932556152,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 1.2778361344537816,
|
|
"grad_norm": 10.710851096646278,
|
|
"learning_rate": 7.102314300623425e-06,
|
|
"loss": 1.468703031539917,
|
|
"step": 2433
|
|
},
|
|
{
|
|
"epoch": 1.278361344537815,
|
|
"grad_norm": 18.498009820053685,
|
|
"learning_rate": 7.0995411477573786e-06,
|
|
"loss": 3.8373215198516846,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 1.2788865546218489,
|
|
"grad_norm": 13.547066904733787,
|
|
"learning_rate": 7.096767210563031e-06,
|
|
"loss": 1.225649118423462,
|
|
"step": 2435
|
|
},
|
|
{
|
|
"epoch": 1.2794117647058822,
|
|
"grad_norm": 15.803823739293396,
|
|
"learning_rate": 7.093992490076652e-06,
|
|
"loss": 1.571283221244812,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 1.279936974789916,
|
|
"grad_norm": 12.594173306429065,
|
|
"learning_rate": 7.091216987334792e-06,
|
|
"loss": 1.723379135131836,
|
|
"step": 2437
|
|
},
|
|
{
|
|
"epoch": 1.2804621848739495,
|
|
"grad_norm": 8.408963976459619,
|
|
"learning_rate": 7.088440703374302e-06,
|
|
"loss": 1.1697720289230347,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 1.2809873949579833,
|
|
"grad_norm": 8.255317949054,
|
|
"learning_rate": 7.0856636392323205e-06,
|
|
"loss": 1.266052484512329,
|
|
"step": 2439
|
|
},
|
|
{
|
|
"epoch": 1.2815126050420167,
|
|
"grad_norm": 27.24276770547867,
|
|
"learning_rate": 7.08288579594628e-06,
|
|
"loss": 1.7574093341827393,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 1.2820378151260505,
|
|
"grad_norm": 15.15257145767664,
|
|
"learning_rate": 7.080107174553903e-06,
|
|
"loss": 2.3162498474121094,
|
|
"step": 2441
|
|
},
|
|
{
|
|
"epoch": 1.282563025210084,
|
|
"grad_norm": 15.105360916116396,
|
|
"learning_rate": 7.0773277760932015e-06,
|
|
"loss": 1.3757708072662354,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 1.2830882352941178,
|
|
"grad_norm": 12.230471977015398,
|
|
"learning_rate": 7.074547601602479e-06,
|
|
"loss": 1.2318499088287354,
|
|
"step": 2443
|
|
},
|
|
{
|
|
"epoch": 1.2836134453781511,
|
|
"grad_norm": 10.457889294630673,
|
|
"learning_rate": 7.071766652120331e-06,
|
|
"loss": 2.0742783546447754,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 1.284138655462185,
|
|
"grad_norm": 23.0353340887712,
|
|
"learning_rate": 7.068984928685638e-06,
|
|
"loss": 2.8480892181396484,
|
|
"step": 2445
|
|
},
|
|
{
|
|
"epoch": 1.2846638655462184,
|
|
"grad_norm": 9.600151102945814,
|
|
"learning_rate": 7.0662024323375745e-06,
|
|
"loss": 1.3152753114700317,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 1.2851890756302522,
|
|
"grad_norm": 17.008552694520134,
|
|
"learning_rate": 7.063419164115598e-06,
|
|
"loss": 1.9276971817016602,
|
|
"step": 2447
|
|
},
|
|
{
|
|
"epoch": 1.2857142857142856,
|
|
"grad_norm": 11.052118241756485,
|
|
"learning_rate": 7.060635125059461e-06,
|
|
"loss": 1.2775708436965942,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 1.2862394957983194,
|
|
"grad_norm": 10.236994214278042,
|
|
"learning_rate": 7.057850316209198e-06,
|
|
"loss": 1.361575722694397,
|
|
"step": 2449
|
|
},
|
|
{
|
|
"epoch": 1.2867647058823528,
|
|
"grad_norm": 19.449345318770636,
|
|
"learning_rate": 7.055064738605134e-06,
|
|
"loss": 1.8090773820877075,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.2872899159663866,
|
|
"grad_norm": 9.225946217388557,
|
|
"learning_rate": 7.052278393287884e-06,
|
|
"loss": 2.043266773223877,
|
|
"step": 2451
|
|
},
|
|
{
|
|
"epoch": 1.28781512605042,
|
|
"grad_norm": 8.613599139821474,
|
|
"learning_rate": 7.049491281298342e-06,
|
|
"loss": 1.7184176445007324,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 1.2883403361344539,
|
|
"grad_norm": 11.700284459321495,
|
|
"learning_rate": 7.0467034036776945e-06,
|
|
"loss": 1.412935495376587,
|
|
"step": 2453
|
|
},
|
|
{
|
|
"epoch": 1.2888655462184873,
|
|
"grad_norm": 12.860868786785693,
|
|
"learning_rate": 7.043914761467414e-06,
|
|
"loss": 1.8810560703277588,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 1.289390756302521,
|
|
"grad_norm": 8.95799380669124,
|
|
"learning_rate": 7.041125355709256e-06,
|
|
"loss": 1.2562406063079834,
|
|
"step": 2455
|
|
},
|
|
{
|
|
"epoch": 1.2899159663865547,
|
|
"grad_norm": 10.112064208293942,
|
|
"learning_rate": 7.038335187445263e-06,
|
|
"loss": 1.3138580322265625,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 1.2904411764705883,
|
|
"grad_norm": 13.636094187722334,
|
|
"learning_rate": 7.035544257717761e-06,
|
|
"loss": 1.4140331745147705,
|
|
"step": 2457
|
|
},
|
|
{
|
|
"epoch": 1.290966386554622,
|
|
"grad_norm": 12.628416107837033,
|
|
"learning_rate": 7.032752567569362e-06,
|
|
"loss": 1.4917939901351929,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 1.2914915966386555,
|
|
"grad_norm": 10.456060957087649,
|
|
"learning_rate": 7.0299601180429615e-06,
|
|
"loss": 2.041104793548584,
|
|
"step": 2459
|
|
},
|
|
{
|
|
"epoch": 1.2920168067226891,
|
|
"grad_norm": 15.10892323677275,
|
|
"learning_rate": 7.0271669101817375e-06,
|
|
"loss": 1.2987377643585205,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 1.2925420168067228,
|
|
"grad_norm": 12.641863560360875,
|
|
"learning_rate": 7.024372945029152e-06,
|
|
"loss": 2.1223607063293457,
|
|
"step": 2461
|
|
},
|
|
{
|
|
"epoch": 1.2930672268907564,
|
|
"grad_norm": 9.589315059027086,
|
|
"learning_rate": 7.02157822362895e-06,
|
|
"loss": 1.4910497665405273,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 1.29359243697479,
|
|
"grad_norm": 10.449212079431476,
|
|
"learning_rate": 7.018782747025161e-06,
|
|
"loss": 1.3137577772140503,
|
|
"step": 2463
|
|
},
|
|
{
|
|
"epoch": 1.2941176470588236,
|
|
"grad_norm": 8.853385697952692,
|
|
"learning_rate": 7.015986516262096e-06,
|
|
"loss": 1.8313026428222656,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 1.2946428571428572,
|
|
"grad_norm": 12.65867949209294,
|
|
"learning_rate": 7.013189532384343e-06,
|
|
"loss": 1.743154525756836,
|
|
"step": 2465
|
|
},
|
|
{
|
|
"epoch": 1.2951680672268908,
|
|
"grad_norm": 14.294451207287464,
|
|
"learning_rate": 7.010391796436775e-06,
|
|
"loss": 1.4444868564605713,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 1.2956932773109244,
|
|
"grad_norm": 11.366121595375953,
|
|
"learning_rate": 7.007593309464549e-06,
|
|
"loss": 2.383770227432251,
|
|
"step": 2467
|
|
},
|
|
{
|
|
"epoch": 1.296218487394958,
|
|
"grad_norm": 7.761305998097778,
|
|
"learning_rate": 7.004794072513096e-06,
|
|
"loss": 1.5093591213226318,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 1.2967436974789917,
|
|
"grad_norm": 10.511966100376124,
|
|
"learning_rate": 7.001994086628133e-06,
|
|
"loss": 2.276927947998047,
|
|
"step": 2469
|
|
},
|
|
{
|
|
"epoch": 1.2972689075630253,
|
|
"grad_norm": 11.81309193143502,
|
|
"learning_rate": 6.999193352855652e-06,
|
|
"loss": 2.1074419021606445,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 1.2977941176470589,
|
|
"grad_norm": 7.328549800873724,
|
|
"learning_rate": 6.99639187224193e-06,
|
|
"loss": 1.4727612733840942,
|
|
"step": 2471
|
|
},
|
|
{
|
|
"epoch": 1.2983193277310925,
|
|
"grad_norm": 14.305735034760207,
|
|
"learning_rate": 6.9935896458335176e-06,
|
|
"loss": 1.7596924304962158,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 1.298844537815126,
|
|
"grad_norm": 11.72411346833107,
|
|
"learning_rate": 6.990786674677246e-06,
|
|
"loss": 1.3528380393981934,
|
|
"step": 2473
|
|
},
|
|
{
|
|
"epoch": 1.2993697478991597,
|
|
"grad_norm": 8.637583794808803,
|
|
"learning_rate": 6.987982959820224e-06,
|
|
"loss": 1.59431791305542,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 1.2998949579831933,
|
|
"grad_norm": 10.306180855405167,
|
|
"learning_rate": 6.985178502309842e-06,
|
|
"loss": 1.5949974060058594,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 1.300420168067227,
|
|
"grad_norm": 17.461503333618932,
|
|
"learning_rate": 6.982373303193763e-06,
|
|
"loss": 1.2602121829986572,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 1.3009453781512605,
|
|
"grad_norm": 11.863168941574267,
|
|
"learning_rate": 6.979567363519927e-06,
|
|
"loss": 1.1584863662719727,
|
|
"step": 2477
|
|
},
|
|
{
|
|
"epoch": 1.3014705882352942,
|
|
"grad_norm": 13.700741655029095,
|
|
"learning_rate": 6.976760684336556e-06,
|
|
"loss": 1.492077112197876,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 1.3019957983193278,
|
|
"grad_norm": 10.163214071824699,
|
|
"learning_rate": 6.973953266692143e-06,
|
|
"loss": 1.0241563320159912,
|
|
"step": 2479
|
|
},
|
|
{
|
|
"epoch": 1.3025210084033614,
|
|
"grad_norm": 10.034203329814925,
|
|
"learning_rate": 6.9711451116354576e-06,
|
|
"loss": 1.3957622051239014,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 1.303046218487395,
|
|
"grad_norm": 9.863725755737763,
|
|
"learning_rate": 6.9683362202155465e-06,
|
|
"loss": 1.196930170059204,
|
|
"step": 2481
|
|
},
|
|
{
|
|
"epoch": 1.3035714285714286,
|
|
"grad_norm": 31.49627663522701,
|
|
"learning_rate": 6.965526593481734e-06,
|
|
"loss": 1.653714656829834,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 1.3040966386554622,
|
|
"grad_norm": 11.437529945321943,
|
|
"learning_rate": 6.962716232483612e-06,
|
|
"loss": 1.6412488222122192,
|
|
"step": 2483
|
|
},
|
|
{
|
|
"epoch": 1.3046218487394958,
|
|
"grad_norm": 17.18904918559585,
|
|
"learning_rate": 6.959905138271051e-06,
|
|
"loss": 2.010774612426758,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 1.3051470588235294,
|
|
"grad_norm": 11.86631216395381,
|
|
"learning_rate": 6.957093311894199e-06,
|
|
"loss": 1.3377530574798584,
|
|
"step": 2485
|
|
},
|
|
{
|
|
"epoch": 1.305672268907563,
|
|
"grad_norm": 8.065782433379868,
|
|
"learning_rate": 6.954280754403469e-06,
|
|
"loss": 2.142580032348633,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 1.3061974789915967,
|
|
"grad_norm": 10.93177158102414,
|
|
"learning_rate": 6.951467466849553e-06,
|
|
"loss": 1.1615842580795288,
|
|
"step": 2487
|
|
},
|
|
{
|
|
"epoch": 1.3067226890756303,
|
|
"grad_norm": 8.762552231845937,
|
|
"learning_rate": 6.948653450283416e-06,
|
|
"loss": 2.062228202819824,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 1.3072478991596639,
|
|
"grad_norm": 14.14240358317665,
|
|
"learning_rate": 6.945838705756293e-06,
|
|
"loss": 1.3985791206359863,
|
|
"step": 2489
|
|
},
|
|
{
|
|
"epoch": 1.3077731092436975,
|
|
"grad_norm": 12.192426375474398,
|
|
"learning_rate": 6.943023234319691e-06,
|
|
"loss": 2.175405502319336,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 1.308298319327731,
|
|
"grad_norm": 11.19077682008825,
|
|
"learning_rate": 6.940207037025391e-06,
|
|
"loss": 1.8613638877868652,
|
|
"step": 2491
|
|
},
|
|
{
|
|
"epoch": 1.3088235294117647,
|
|
"grad_norm": 13.661050933922677,
|
|
"learning_rate": 6.93739011492544e-06,
|
|
"loss": 1.8705384731292725,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 1.3093487394957983,
|
|
"grad_norm": 12.848329309525058,
|
|
"learning_rate": 6.934572469072163e-06,
|
|
"loss": 0.7445922493934631,
|
|
"step": 2493
|
|
},
|
|
{
|
|
"epoch": 1.309873949579832,
|
|
"grad_norm": 9.521107438821621,
|
|
"learning_rate": 6.931754100518151e-06,
|
|
"loss": 1.5413544178009033,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 1.3103991596638656,
|
|
"grad_norm": 7.490547274245034,
|
|
"learning_rate": 6.928935010316262e-06,
|
|
"loss": 1.845062494277954,
|
|
"step": 2495
|
|
},
|
|
{
|
|
"epoch": 1.3109243697478992,
|
|
"grad_norm": 11.475722297659969,
|
|
"learning_rate": 6.926115199519632e-06,
|
|
"loss": 1.6455457210540771,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 1.3114495798319328,
|
|
"grad_norm": 11.416749357700317,
|
|
"learning_rate": 6.923294669181659e-06,
|
|
"loss": 1.795639157295227,
|
|
"step": 2497
|
|
},
|
|
{
|
|
"epoch": 1.3119747899159664,
|
|
"grad_norm": 14.580465924989591,
|
|
"learning_rate": 6.920473420356013e-06,
|
|
"loss": 2.1982929706573486,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 1.3125,
|
|
"grad_norm": 10.754106629208545,
|
|
"learning_rate": 6.91765145409663e-06,
|
|
"loss": 1.6893014907836914,
|
|
"step": 2499
|
|
},
|
|
{
|
|
"epoch": 1.3130252100840336,
|
|
"grad_norm": 12.308432564515396,
|
|
"learning_rate": 6.914828771457718e-06,
|
|
"loss": 1.7461330890655518,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.3135504201680672,
|
|
"grad_norm": 7.99296736363269,
|
|
"learning_rate": 6.912005373493747e-06,
|
|
"loss": 1.3773590326309204,
|
|
"step": 2501
|
|
},
|
|
{
|
|
"epoch": 1.3140756302521008,
|
|
"grad_norm": 9.43636596519202,
|
|
"learning_rate": 6.909181261259461e-06,
|
|
"loss": 1.7150299549102783,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 1.3146008403361344,
|
|
"grad_norm": 13.615524480762446,
|
|
"learning_rate": 6.9063564358098636e-06,
|
|
"loss": 1.3732833862304688,
|
|
"step": 2503
|
|
},
|
|
{
|
|
"epoch": 1.315126050420168,
|
|
"grad_norm": 8.099324301769936,
|
|
"learning_rate": 6.903530898200231e-06,
|
|
"loss": 1.6331502199172974,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 1.3156512605042017,
|
|
"grad_norm": 12.83455375137105,
|
|
"learning_rate": 6.900704649486103e-06,
|
|
"loss": 1.3890109062194824,
|
|
"step": 2505
|
|
},
|
|
{
|
|
"epoch": 1.3161764705882353,
|
|
"grad_norm": 10.330302981513746,
|
|
"learning_rate": 6.897877690723285e-06,
|
|
"loss": 1.0741055011749268,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 1.316701680672269,
|
|
"grad_norm": 13.608230933694813,
|
|
"learning_rate": 6.895050022967844e-06,
|
|
"loss": 1.4386227130889893,
|
|
"step": 2507
|
|
},
|
|
{
|
|
"epoch": 1.3172268907563025,
|
|
"grad_norm": 11.404381582808693,
|
|
"learning_rate": 6.89222164727612e-06,
|
|
"loss": 2.4623260498046875,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 1.3177521008403361,
|
|
"grad_norm": 9.654563698345797,
|
|
"learning_rate": 6.889392564704712e-06,
|
|
"loss": 1.3226982355117798,
|
|
"step": 2509
|
|
},
|
|
{
|
|
"epoch": 1.3182773109243697,
|
|
"grad_norm": 18.538528952492204,
|
|
"learning_rate": 6.886562776310482e-06,
|
|
"loss": 1.6413676738739014,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 1.3188025210084033,
|
|
"grad_norm": 13.063432822429318,
|
|
"learning_rate": 6.88373228315056e-06,
|
|
"loss": 1.5557506084442139,
|
|
"step": 2511
|
|
},
|
|
{
|
|
"epoch": 1.319327731092437,
|
|
"grad_norm": 11.261364293695788,
|
|
"learning_rate": 6.880901086282337e-06,
|
|
"loss": 2.2341644763946533,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 1.3198529411764706,
|
|
"grad_norm": 11.076696009119743,
|
|
"learning_rate": 6.878069186763466e-06,
|
|
"loss": 1.740747332572937,
|
|
"step": 2513
|
|
},
|
|
{
|
|
"epoch": 1.3203781512605042,
|
|
"grad_norm": 8.168371709889001,
|
|
"learning_rate": 6.8752365856518595e-06,
|
|
"loss": 1.3587056398391724,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 1.3209033613445378,
|
|
"grad_norm": 10.693304800338483,
|
|
"learning_rate": 6.872403284005703e-06,
|
|
"loss": 1.4282145500183105,
|
|
"step": 2515
|
|
},
|
|
{
|
|
"epoch": 1.3214285714285714,
|
|
"grad_norm": 9.20667491126241,
|
|
"learning_rate": 6.869569282883434e-06,
|
|
"loss": 1.3325786590576172,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 1.321953781512605,
|
|
"grad_norm": 11.509424025330327,
|
|
"learning_rate": 6.866734583343753e-06,
|
|
"loss": 1.85734224319458,
|
|
"step": 2517
|
|
},
|
|
{
|
|
"epoch": 1.3224789915966386,
|
|
"grad_norm": 12.590709244370263,
|
|
"learning_rate": 6.8638991864456205e-06,
|
|
"loss": 1.0745584964752197,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 1.3230042016806722,
|
|
"grad_norm": 19.291653056658767,
|
|
"learning_rate": 6.861063093248264e-06,
|
|
"loss": 1.315691351890564,
|
|
"step": 2519
|
|
},
|
|
{
|
|
"epoch": 1.3235294117647058,
|
|
"grad_norm": 11.880771493036994,
|
|
"learning_rate": 6.858226304811163e-06,
|
|
"loss": 1.4071595668792725,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 1.3240546218487395,
|
|
"grad_norm": 9.494184232116408,
|
|
"learning_rate": 6.855388822194061e-06,
|
|
"loss": 1.9811328649520874,
|
|
"step": 2521
|
|
},
|
|
{
|
|
"epoch": 1.324579831932773,
|
|
"grad_norm": 11.820403139060442,
|
|
"learning_rate": 6.852550646456962e-06,
|
|
"loss": 2.430528163909912,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 1.3251050420168067,
|
|
"grad_norm": 8.929151314662786,
|
|
"learning_rate": 6.849711778660124e-06,
|
|
"loss": 1.6251394748687744,
|
|
"step": 2523
|
|
},
|
|
{
|
|
"epoch": 1.3256302521008403,
|
|
"grad_norm": 19.98342407839324,
|
|
"learning_rate": 6.846872219864068e-06,
|
|
"loss": 1.5677638053894043,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 1.326155462184874,
|
|
"grad_norm": 13.528596231737234,
|
|
"learning_rate": 6.844031971129571e-06,
|
|
"loss": 1.9841523170471191,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 1.3266806722689075,
|
|
"grad_norm": 24.438549163870544,
|
|
"learning_rate": 6.84119103351767e-06,
|
|
"loss": 1.4829826354980469,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 1.3272058823529411,
|
|
"grad_norm": 18.612654802549784,
|
|
"learning_rate": 6.8383494080896575e-06,
|
|
"loss": 1.2201976776123047,
|
|
"step": 2527
|
|
},
|
|
{
|
|
"epoch": 1.3277310924369747,
|
|
"grad_norm": 11.683062185750027,
|
|
"learning_rate": 6.835507095907082e-06,
|
|
"loss": 1.965669870376587,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 1.3282563025210083,
|
|
"grad_norm": 12.42980937815667,
|
|
"learning_rate": 6.8326640980317475e-06,
|
|
"loss": 1.4965461492538452,
|
|
"step": 2529
|
|
},
|
|
{
|
|
"epoch": 1.328781512605042,
|
|
"grad_norm": 8.660389755626476,
|
|
"learning_rate": 6.829820415525721e-06,
|
|
"loss": 1.406752347946167,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 1.3293067226890756,
|
|
"grad_norm": 12.00426067443169,
|
|
"learning_rate": 6.8269760494513185e-06,
|
|
"loss": 1.608267068862915,
|
|
"step": 2531
|
|
},
|
|
{
|
|
"epoch": 1.3298319327731092,
|
|
"grad_norm": 11.123005964951068,
|
|
"learning_rate": 6.824131000871113e-06,
|
|
"loss": 1.803060531616211,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 1.3303571428571428,
|
|
"grad_norm": 10.273374497276253,
|
|
"learning_rate": 6.821285270847934e-06,
|
|
"loss": 0.8821080923080444,
|
|
"step": 2533
|
|
},
|
|
{
|
|
"epoch": 1.3308823529411764,
|
|
"grad_norm": 11.7024536004965,
|
|
"learning_rate": 6.818438860444865e-06,
|
|
"loss": 1.0945472717285156,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 1.33140756302521,
|
|
"grad_norm": 8.949617329870396,
|
|
"learning_rate": 6.815591770725241e-06,
|
|
"loss": 1.6190129518508911,
|
|
"step": 2535
|
|
},
|
|
{
|
|
"epoch": 1.3319327731092436,
|
|
"grad_norm": 10.707926587792425,
|
|
"learning_rate": 6.812744002752653e-06,
|
|
"loss": 1.4689741134643555,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 1.3324579831932772,
|
|
"grad_norm": 12.086019162302449,
|
|
"learning_rate": 6.80989555759095e-06,
|
|
"loss": 1.2574225664138794,
|
|
"step": 2537
|
|
},
|
|
{
|
|
"epoch": 1.3329831932773109,
|
|
"grad_norm": 9.606623699042247,
|
|
"learning_rate": 6.807046436304224e-06,
|
|
"loss": 1.4694947004318237,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 1.3335084033613445,
|
|
"grad_norm": 13.763373638672054,
|
|
"learning_rate": 6.804196639956828e-06,
|
|
"loss": 1.2479515075683594,
|
|
"step": 2539
|
|
},
|
|
{
|
|
"epoch": 1.334033613445378,
|
|
"grad_norm": 15.701453600680292,
|
|
"learning_rate": 6.801346169613361e-06,
|
|
"loss": 1.2505472898483276,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 1.3345588235294117,
|
|
"grad_norm": 7.851971342183067,
|
|
"learning_rate": 6.79849502633868e-06,
|
|
"loss": 1.3575226068496704,
|
|
"step": 2541
|
|
},
|
|
{
|
|
"epoch": 1.3350840336134453,
|
|
"grad_norm": 12.599257053527712,
|
|
"learning_rate": 6.79564321119789e-06,
|
|
"loss": 2.1606597900390625,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 1.335609243697479,
|
|
"grad_norm": 12.879209720966696,
|
|
"learning_rate": 6.792790725256347e-06,
|
|
"loss": 1.8259855508804321,
|
|
"step": 2543
|
|
},
|
|
{
|
|
"epoch": 1.3361344537815127,
|
|
"grad_norm": 16.980649451686435,
|
|
"learning_rate": 6.7899375695796545e-06,
|
|
"loss": 1.5110688209533691,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 1.3366596638655461,
|
|
"grad_norm": 9.898623798622163,
|
|
"learning_rate": 6.787083745233674e-06,
|
|
"loss": 1.8591792583465576,
|
|
"step": 2545
|
|
},
|
|
{
|
|
"epoch": 1.33718487394958,
|
|
"grad_norm": 11.167484827774937,
|
|
"learning_rate": 6.784229253284511e-06,
|
|
"loss": 2.2330236434936523,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 1.3377100840336134,
|
|
"grad_norm": 13.873008847757054,
|
|
"learning_rate": 6.781374094798522e-06,
|
|
"loss": 2.464442491531372,
|
|
"step": 2547
|
|
},
|
|
{
|
|
"epoch": 1.3382352941176472,
|
|
"grad_norm": 10.521202124826498,
|
|
"learning_rate": 6.77851827084231e-06,
|
|
"loss": 1.4322118759155273,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 1.3387605042016806,
|
|
"grad_norm": 10.514189793460767,
|
|
"learning_rate": 6.775661782482732e-06,
|
|
"loss": 1.3177762031555176,
|
|
"step": 2549
|
|
},
|
|
{
|
|
"epoch": 1.3392857142857144,
|
|
"grad_norm": 9.46733935917788,
|
|
"learning_rate": 6.7728046307868875e-06,
|
|
"loss": 1.9117001295089722,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 1.3398109243697478,
|
|
"grad_norm": 11.983795556008838,
|
|
"learning_rate": 6.769946816822128e-06,
|
|
"loss": 1.4520769119262695,
|
|
"step": 2551
|
|
},
|
|
{
|
|
"epoch": 1.3403361344537816,
|
|
"grad_norm": 11.959673616566686,
|
|
"learning_rate": 6.767088341656051e-06,
|
|
"loss": 1.7856323719024658,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 1.340861344537815,
|
|
"grad_norm": 15.4263714940938,
|
|
"learning_rate": 6.764229206356498e-06,
|
|
"loss": 1.325421690940857,
|
|
"step": 2553
|
|
},
|
|
{
|
|
"epoch": 1.3413865546218489,
|
|
"grad_norm": 14.404989650969412,
|
|
"learning_rate": 6.761369411991564e-06,
|
|
"loss": 1.808119535446167,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 1.3419117647058822,
|
|
"grad_norm": 10.145814591441445,
|
|
"learning_rate": 6.7585089596295815e-06,
|
|
"loss": 1.6972968578338623,
|
|
"step": 2555
|
|
},
|
|
{
|
|
"epoch": 1.342436974789916,
|
|
"grad_norm": 13.800616055779992,
|
|
"learning_rate": 6.7556478503391375e-06,
|
|
"loss": 1.389564037322998,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 1.3429621848739495,
|
|
"grad_norm": 23.327558473709566,
|
|
"learning_rate": 6.752786085189059e-06,
|
|
"loss": 1.3491129875183105,
|
|
"step": 2557
|
|
},
|
|
{
|
|
"epoch": 1.3434873949579833,
|
|
"grad_norm": 8.807767000944555,
|
|
"learning_rate": 6.749923665248419e-06,
|
|
"loss": 0.8782278895378113,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 1.3440126050420167,
|
|
"grad_norm": 9.039579718890321,
|
|
"learning_rate": 6.747060591586533e-06,
|
|
"loss": 2.1642870903015137,
|
|
"step": 2559
|
|
},
|
|
{
|
|
"epoch": 1.3445378151260505,
|
|
"grad_norm": 8.52660806934076,
|
|
"learning_rate": 6.744196865272967e-06,
|
|
"loss": 1.5673531293869019,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 1.345063025210084,
|
|
"grad_norm": 11.06218019437839,
|
|
"learning_rate": 6.741332487377525e-06,
|
|
"loss": 1.4878621101379395,
|
|
"step": 2561
|
|
},
|
|
{
|
|
"epoch": 1.3455882352941178,
|
|
"grad_norm": 12.440956245310376,
|
|
"learning_rate": 6.738467458970257e-06,
|
|
"loss": 1.1033461093902588,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 1.3461134453781511,
|
|
"grad_norm": 15.710698731020198,
|
|
"learning_rate": 6.735601781121454e-06,
|
|
"loss": 1.962068796157837,
|
|
"step": 2563
|
|
},
|
|
{
|
|
"epoch": 1.346638655462185,
|
|
"grad_norm": 9.55682423903587,
|
|
"learning_rate": 6.732735454901655e-06,
|
|
"loss": 1.9984880685806274,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 1.3471638655462184,
|
|
"grad_norm": 8.30414066844809,
|
|
"learning_rate": 6.729868481381632e-06,
|
|
"loss": 1.3165334463119507,
|
|
"step": 2565
|
|
},
|
|
{
|
|
"epoch": 1.3476890756302522,
|
|
"grad_norm": 14.592880402511963,
|
|
"learning_rate": 6.727000861632406e-06,
|
|
"loss": 1.8011142015457153,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 1.3482142857142856,
|
|
"grad_norm": 12.499109422383643,
|
|
"learning_rate": 6.724132596725237e-06,
|
|
"loss": 2.018202304840088,
|
|
"step": 2567
|
|
},
|
|
{
|
|
"epoch": 1.3487394957983194,
|
|
"grad_norm": 14.192070130331215,
|
|
"learning_rate": 6.7212636877316285e-06,
|
|
"loss": 1.5367764234542847,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 1.3492647058823528,
|
|
"grad_norm": 15.339220046068164,
|
|
"learning_rate": 6.718394135723321e-06,
|
|
"loss": 1.013871192932129,
|
|
"step": 2569
|
|
},
|
|
{
|
|
"epoch": 1.3497899159663866,
|
|
"grad_norm": 8.918308112649619,
|
|
"learning_rate": 6.7155239417722965e-06,
|
|
"loss": 1.7312051057815552,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 1.35031512605042,
|
|
"grad_norm": 10.220664703279132,
|
|
"learning_rate": 6.712653106950778e-06,
|
|
"loss": 1.2396622896194458,
|
|
"step": 2571
|
|
},
|
|
{
|
|
"epoch": 1.3508403361344539,
|
|
"grad_norm": 12.729365501764047,
|
|
"learning_rate": 6.709781632331225e-06,
|
|
"loss": 1.614361047744751,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 1.3513655462184873,
|
|
"grad_norm": 8.795237921642682,
|
|
"learning_rate": 6.706909518986341e-06,
|
|
"loss": 1.6548465490341187,
|
|
"step": 2573
|
|
},
|
|
{
|
|
"epoch": 1.351890756302521,
|
|
"grad_norm": 11.77075807534768,
|
|
"learning_rate": 6.7040367679890615e-06,
|
|
"loss": 1.3578767776489258,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 1.3524159663865547,
|
|
"grad_norm": 8.822220085139815,
|
|
"learning_rate": 6.701163380412568e-06,
|
|
"loss": 1.4953851699829102,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 1.3529411764705883,
|
|
"grad_norm": 7.840622770182397,
|
|
"learning_rate": 6.698289357330272e-06,
|
|
"loss": 1.3457810878753662,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 1.353466386554622,
|
|
"grad_norm": 9.702436161519671,
|
|
"learning_rate": 6.695414699815828e-06,
|
|
"loss": 1.5230767726898193,
|
|
"step": 2577
|
|
},
|
|
{
|
|
"epoch": 1.3539915966386555,
|
|
"grad_norm": 10.397147343129433,
|
|
"learning_rate": 6.692539408943124e-06,
|
|
"loss": 1.2239813804626465,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 1.3545168067226891,
|
|
"grad_norm": 13.13970364203173,
|
|
"learning_rate": 6.689663485786287e-06,
|
|
"loss": 1.343689203262329,
|
|
"step": 2579
|
|
},
|
|
{
|
|
"epoch": 1.3550420168067228,
|
|
"grad_norm": 18.493747579673524,
|
|
"learning_rate": 6.686786931419681e-06,
|
|
"loss": 1.5230176448822021,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 1.3555672268907564,
|
|
"grad_norm": 10.334524274305997,
|
|
"learning_rate": 6.6839097469179e-06,
|
|
"loss": 1.6321138143539429,
|
|
"step": 2581
|
|
},
|
|
{
|
|
"epoch": 1.35609243697479,
|
|
"grad_norm": 16.73477207146799,
|
|
"learning_rate": 6.6810319333557815e-06,
|
|
"loss": 0.9947364330291748,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 1.3566176470588236,
|
|
"grad_norm": 11.768404186451034,
|
|
"learning_rate": 6.678153491808394e-06,
|
|
"loss": 1.2328753471374512,
|
|
"step": 2583
|
|
},
|
|
{
|
|
"epoch": 1.3571428571428572,
|
|
"grad_norm": 18.240083270794724,
|
|
"learning_rate": 6.675274423351037e-06,
|
|
"loss": 1.6512658596038818,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 1.3576680672268908,
|
|
"grad_norm": 10.917484463702253,
|
|
"learning_rate": 6.6723947290592505e-06,
|
|
"loss": 1.6520872116088867,
|
|
"step": 2585
|
|
},
|
|
{
|
|
"epoch": 1.3581932773109244,
|
|
"grad_norm": 10.929865099541153,
|
|
"learning_rate": 6.669514410008806e-06,
|
|
"loss": 2.4195618629455566,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 1.358718487394958,
|
|
"grad_norm": 9.344452389539134,
|
|
"learning_rate": 6.666633467275706e-06,
|
|
"loss": 1.4502514600753784,
|
|
"step": 2587
|
|
},
|
|
{
|
|
"epoch": 1.3592436974789917,
|
|
"grad_norm": 12.74859647104274,
|
|
"learning_rate": 6.6637519019361895e-06,
|
|
"loss": 2.389765977859497,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 1.3597689075630253,
|
|
"grad_norm": 13.963940493816377,
|
|
"learning_rate": 6.660869715066725e-06,
|
|
"loss": 1.0900962352752686,
|
|
"step": 2589
|
|
},
|
|
{
|
|
"epoch": 1.3602941176470589,
|
|
"grad_norm": 11.492524116734717,
|
|
"learning_rate": 6.657986907744018e-06,
|
|
"loss": 2.2712888717651367,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 1.3608193277310925,
|
|
"grad_norm": 8.71014440315091,
|
|
"learning_rate": 6.655103481045e-06,
|
|
"loss": 1.4768775701522827,
|
|
"step": 2591
|
|
},
|
|
{
|
|
"epoch": 1.361344537815126,
|
|
"grad_norm": 12.506118340174757,
|
|
"learning_rate": 6.652219436046836e-06,
|
|
"loss": 1.9261109828948975,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 1.3618697478991597,
|
|
"grad_norm": 13.911280033325387,
|
|
"learning_rate": 6.649334773826924e-06,
|
|
"loss": 2.072033405303955,
|
|
"step": 2593
|
|
},
|
|
{
|
|
"epoch": 1.3623949579831933,
|
|
"grad_norm": 13.4623849741064,
|
|
"learning_rate": 6.646449495462891e-06,
|
|
"loss": 1.2910997867584229,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 1.362920168067227,
|
|
"grad_norm": 13.046448031093849,
|
|
"learning_rate": 6.643563602032593e-06,
|
|
"loss": 1.1936817169189453,
|
|
"step": 2595
|
|
},
|
|
{
|
|
"epoch": 1.3634453781512605,
|
|
"grad_norm": 8.811381722565676,
|
|
"learning_rate": 6.640677094614117e-06,
|
|
"loss": 2.49958872795105,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 1.3639705882352942,
|
|
"grad_norm": 11.652098257259762,
|
|
"learning_rate": 6.63778997428578e-06,
|
|
"loss": 2.2749969959259033,
|
|
"step": 2597
|
|
},
|
|
{
|
|
"epoch": 1.3644957983193278,
|
|
"grad_norm": 17.82890109769083,
|
|
"learning_rate": 6.6349022421261275e-06,
|
|
"loss": 1.549394965171814,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 1.3650210084033614,
|
|
"grad_norm": 19.38828160727182,
|
|
"learning_rate": 6.632013899213934e-06,
|
|
"loss": 1.3625071048736572,
|
|
"step": 2599
|
|
},
|
|
{
|
|
"epoch": 1.365546218487395,
|
|
"grad_norm": 9.68278057120787,
|
|
"learning_rate": 6.629124946628198e-06,
|
|
"loss": 1.88570237159729,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 1.3660714285714286,
|
|
"grad_norm": 9.485250698838614,
|
|
"learning_rate": 6.626235385448152e-06,
|
|
"loss": 1.4110757112503052,
|
|
"step": 2601
|
|
},
|
|
{
|
|
"epoch": 1.3665966386554622,
|
|
"grad_norm": 10.356454455125625,
|
|
"learning_rate": 6.623345216753254e-06,
|
|
"loss": 1.019836187362671,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 1.3671218487394958,
|
|
"grad_norm": 9.839943031800415,
|
|
"learning_rate": 6.6204544416231865e-06,
|
|
"loss": 1.557316541671753,
|
|
"step": 2603
|
|
},
|
|
{
|
|
"epoch": 1.3676470588235294,
|
|
"grad_norm": 9.516374842795816,
|
|
"learning_rate": 6.617563061137859e-06,
|
|
"loss": 1.1713138818740845,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 1.368172268907563,
|
|
"grad_norm": 8.888276381665476,
|
|
"learning_rate": 6.61467107637741e-06,
|
|
"loss": 1.6227775812149048,
|
|
"step": 2605
|
|
},
|
|
{
|
|
"epoch": 1.3686974789915967,
|
|
"grad_norm": 9.936072019817864,
|
|
"learning_rate": 6.611778488422203e-06,
|
|
"loss": 1.488483190536499,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 1.3692226890756303,
|
|
"grad_norm": 14.394976903542132,
|
|
"learning_rate": 6.608885298352823e-06,
|
|
"loss": 1.5318236351013184,
|
|
"step": 2607
|
|
},
|
|
{
|
|
"epoch": 1.3697478991596639,
|
|
"grad_norm": 7.2118829005676455,
|
|
"learning_rate": 6.6059915072500845e-06,
|
|
"loss": 1.4667561054229736,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 1.3702731092436975,
|
|
"grad_norm": 19.7245976124542,
|
|
"learning_rate": 6.603097116195026e-06,
|
|
"loss": 1.724409580230713,
|
|
"step": 2609
|
|
},
|
|
{
|
|
"epoch": 1.370798319327731,
|
|
"grad_norm": 16.784771707501868,
|
|
"learning_rate": 6.600202126268905e-06,
|
|
"loss": 2.025880813598633,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 1.3713235294117647,
|
|
"grad_norm": 11.661960217787538,
|
|
"learning_rate": 6.59730653855321e-06,
|
|
"loss": 1.8906800746917725,
|
|
"step": 2611
|
|
},
|
|
{
|
|
"epoch": 1.3718487394957983,
|
|
"grad_norm": 13.24328380174217,
|
|
"learning_rate": 6.5944103541296486e-06,
|
|
"loss": 1.6317015886306763,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 1.372373949579832,
|
|
"grad_norm": 15.173657359422222,
|
|
"learning_rate": 6.591513574080152e-06,
|
|
"loss": 1.6432816982269287,
|
|
"step": 2613
|
|
},
|
|
{
|
|
"epoch": 1.3728991596638656,
|
|
"grad_norm": 16.114639890649695,
|
|
"learning_rate": 6.5886161994868744e-06,
|
|
"loss": 1.4184069633483887,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 1.3734243697478992,
|
|
"grad_norm": 16.52273933884079,
|
|
"learning_rate": 6.58571823143219e-06,
|
|
"loss": 1.7298986911773682,
|
|
"step": 2615
|
|
},
|
|
{
|
|
"epoch": 1.3739495798319328,
|
|
"grad_norm": 11.782970640317508,
|
|
"learning_rate": 6.582819670998699e-06,
|
|
"loss": 1.436945915222168,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 1.3744747899159664,
|
|
"grad_norm": 12.52398633251897,
|
|
"learning_rate": 6.579920519269218e-06,
|
|
"loss": 1.3968048095703125,
|
|
"step": 2617
|
|
},
|
|
{
|
|
"epoch": 1.375,
|
|
"grad_norm": 10.295271294768517,
|
|
"learning_rate": 6.577020777326789e-06,
|
|
"loss": 1.237300992012024,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 1.3755252100840336,
|
|
"grad_norm": 12.049889074833844,
|
|
"learning_rate": 6.574120446254672e-06,
|
|
"loss": 1.6429824829101562,
|
|
"step": 2619
|
|
},
|
|
{
|
|
"epoch": 1.3760504201680672,
|
|
"grad_norm": 7.164282559855988,
|
|
"learning_rate": 6.571219527136347e-06,
|
|
"loss": 1.7228885889053345,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 1.3765756302521008,
|
|
"grad_norm": 11.272822828941221,
|
|
"learning_rate": 6.568318021055512e-06,
|
|
"loss": 1.6949775218963623,
|
|
"step": 2621
|
|
},
|
|
{
|
|
"epoch": 1.3771008403361344,
|
|
"grad_norm": 10.272152303244857,
|
|
"learning_rate": 6.5654159290960895e-06,
|
|
"loss": 1.272033929824829,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 1.377626050420168,
|
|
"grad_norm": 12.10455057830705,
|
|
"learning_rate": 6.562513252342216e-06,
|
|
"loss": 1.2234035730361938,
|
|
"step": 2623
|
|
},
|
|
{
|
|
"epoch": 1.3781512605042017,
|
|
"grad_norm": 9.343131832171544,
|
|
"learning_rate": 6.55960999187825e-06,
|
|
"loss": 1.1954748630523682,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 1.3786764705882353,
|
|
"grad_norm": 8.224273647016544,
|
|
"learning_rate": 6.556706148788765e-06,
|
|
"loss": 1.7882004976272583,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 1.379201680672269,
|
|
"grad_norm": 10.40029164081238,
|
|
"learning_rate": 6.553801724158552e-06,
|
|
"loss": 1.4875035285949707,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 1.3797268907563025,
|
|
"grad_norm": 10.655318633990067,
|
|
"learning_rate": 6.550896719072624e-06,
|
|
"loss": 1.8149640560150146,
|
|
"step": 2627
|
|
},
|
|
{
|
|
"epoch": 1.3802521008403361,
|
|
"grad_norm": 13.177774614503326,
|
|
"learning_rate": 6.547991134616204e-06,
|
|
"loss": 1.6386281251907349,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 1.3807773109243697,
|
|
"grad_norm": 7.566280409389933,
|
|
"learning_rate": 6.545084971874738e-06,
|
|
"loss": 2.0819602012634277,
|
|
"step": 2629
|
|
},
|
|
{
|
|
"epoch": 1.3813025210084033,
|
|
"grad_norm": 9.321219115057831,
|
|
"learning_rate": 6.542178231933882e-06,
|
|
"loss": 1.232421875,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 1.381827731092437,
|
|
"grad_norm": 14.479870399116498,
|
|
"learning_rate": 6.539270915879513e-06,
|
|
"loss": 3.0914225578308105,
|
|
"step": 2631
|
|
},
|
|
{
|
|
"epoch": 1.3823529411764706,
|
|
"grad_norm": 13.387208054414357,
|
|
"learning_rate": 6.536363024797721e-06,
|
|
"loss": 1.8260456323623657,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 1.3828781512605042,
|
|
"grad_norm": 9.62056668563731,
|
|
"learning_rate": 6.5334545597748075e-06,
|
|
"loss": 1.3626213073730469,
|
|
"step": 2633
|
|
},
|
|
{
|
|
"epoch": 1.3834033613445378,
|
|
"grad_norm": 12.191952993848863,
|
|
"learning_rate": 6.530545521897293e-06,
|
|
"loss": 1.6068592071533203,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 1.3839285714285714,
|
|
"grad_norm": 10.70208601001819,
|
|
"learning_rate": 6.527635912251914e-06,
|
|
"loss": 1.9337867498397827,
|
|
"step": 2635
|
|
},
|
|
{
|
|
"epoch": 1.384453781512605,
|
|
"grad_norm": 14.753455181001744,
|
|
"learning_rate": 6.524725731925613e-06,
|
|
"loss": 1.1043397188186646,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 1.3849789915966386,
|
|
"grad_norm": 10.501587226416273,
|
|
"learning_rate": 6.521814982005552e-06,
|
|
"loss": 1.173478126525879,
|
|
"step": 2637
|
|
},
|
|
{
|
|
"epoch": 1.3855042016806722,
|
|
"grad_norm": 7.756842605929269,
|
|
"learning_rate": 6.5189036635791e-06,
|
|
"loss": 0.8882420659065247,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 1.3860294117647058,
|
|
"grad_norm": 10.626611100897398,
|
|
"learning_rate": 6.5159917777338466e-06,
|
|
"loss": 1.5061715841293335,
|
|
"step": 2639
|
|
},
|
|
{
|
|
"epoch": 1.3865546218487395,
|
|
"grad_norm": 8.048004334552857,
|
|
"learning_rate": 6.513079325557587e-06,
|
|
"loss": 2.0927653312683105,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 1.387079831932773,
|
|
"grad_norm": 11.249220854343857,
|
|
"learning_rate": 6.510166308138328e-06,
|
|
"loss": 0.9584404826164246,
|
|
"step": 2641
|
|
},
|
|
{
|
|
"epoch": 1.3876050420168067,
|
|
"grad_norm": 11.998451384517095,
|
|
"learning_rate": 6.507252726564293e-06,
|
|
"loss": 1.6279972791671753,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 1.3881302521008403,
|
|
"grad_norm": 18.75109577628921,
|
|
"learning_rate": 6.5043385819239095e-06,
|
|
"loss": 1.6063534021377563,
|
|
"step": 2643
|
|
},
|
|
{
|
|
"epoch": 1.388655462184874,
|
|
"grad_norm": 11.257558081059175,
|
|
"learning_rate": 6.501423875305819e-06,
|
|
"loss": 1.3692386150360107,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 1.3891806722689075,
|
|
"grad_norm": 12.677813614247667,
|
|
"learning_rate": 6.498508607798872e-06,
|
|
"loss": 1.5299866199493408,
|
|
"step": 2645
|
|
},
|
|
{
|
|
"epoch": 1.3897058823529411,
|
|
"grad_norm": 8.320974312397558,
|
|
"learning_rate": 6.4955927804921284e-06,
|
|
"loss": 2.0422544479370117,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 1.3902310924369747,
|
|
"grad_norm": 7.7916598720904755,
|
|
"learning_rate": 6.49267639447486e-06,
|
|
"loss": 1.7703297138214111,
|
|
"step": 2647
|
|
},
|
|
{
|
|
"epoch": 1.3907563025210083,
|
|
"grad_norm": 14.479568830284776,
|
|
"learning_rate": 6.489759450836541e-06,
|
|
"loss": 2.3879706859588623,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 1.391281512605042,
|
|
"grad_norm": 6.298967670520775,
|
|
"learning_rate": 6.48684195066686e-06,
|
|
"loss": 1.664591908454895,
|
|
"step": 2649
|
|
},
|
|
{
|
|
"epoch": 1.3918067226890756,
|
|
"grad_norm": 19.64520198257997,
|
|
"learning_rate": 6.483923895055713e-06,
|
|
"loss": 1.2596275806427002,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 1.3923319327731092,
|
|
"grad_norm": 10.252645986693791,
|
|
"learning_rate": 6.481005285093199e-06,
|
|
"loss": 1.020234227180481,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 1.3928571428571428,
|
|
"grad_norm": 9.145891761994292,
|
|
"learning_rate": 6.4780861218696265e-06,
|
|
"loss": 2.633622407913208,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 1.3933823529411764,
|
|
"grad_norm": 9.158351197360872,
|
|
"learning_rate": 6.475166406475515e-06,
|
|
"loss": 1.233659267425537,
|
|
"step": 2653
|
|
},
|
|
{
|
|
"epoch": 1.39390756302521,
|
|
"grad_norm": 8.32455042133505,
|
|
"learning_rate": 6.472246140001582e-06,
|
|
"loss": 2.1047203540802,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 1.3944327731092436,
|
|
"grad_norm": 12.136926307616424,
|
|
"learning_rate": 6.4693253235387575e-06,
|
|
"loss": 1.2161425352096558,
|
|
"step": 2655
|
|
},
|
|
{
|
|
"epoch": 1.3949579831932772,
|
|
"grad_norm": 16.27085468344808,
|
|
"learning_rate": 6.466403958178175e-06,
|
|
"loss": 1.779348373413086,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 1.3954831932773109,
|
|
"grad_norm": 11.158250460715964,
|
|
"learning_rate": 6.4634820450111715e-06,
|
|
"loss": 1.38726007938385,
|
|
"step": 2657
|
|
},
|
|
{
|
|
"epoch": 1.3960084033613445,
|
|
"grad_norm": 11.611274022507967,
|
|
"learning_rate": 6.460559585129289e-06,
|
|
"loss": 1.1857019662857056,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 1.396533613445378,
|
|
"grad_norm": 15.148118324524411,
|
|
"learning_rate": 6.457636579624278e-06,
|
|
"loss": 1.4580409526824951,
|
|
"step": 2659
|
|
},
|
|
{
|
|
"epoch": 1.3970588235294117,
|
|
"grad_norm": 9.052838688025497,
|
|
"learning_rate": 6.454713029588086e-06,
|
|
"loss": 1.095137357711792,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 1.3975840336134453,
|
|
"grad_norm": 10.358754953892628,
|
|
"learning_rate": 6.451788936112868e-06,
|
|
"loss": 1.8477425575256348,
|
|
"step": 2661
|
|
},
|
|
{
|
|
"epoch": 1.398109243697479,
|
|
"grad_norm": 32.799345529210306,
|
|
"learning_rate": 6.4488643002909845e-06,
|
|
"loss": 2.163045883178711,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 1.3986344537815127,
|
|
"grad_norm": 6.803439693193377,
|
|
"learning_rate": 6.445939123214991e-06,
|
|
"loss": 1.712918758392334,
|
|
"step": 2663
|
|
},
|
|
{
|
|
"epoch": 1.3991596638655461,
|
|
"grad_norm": 15.808125169176924,
|
|
"learning_rate": 6.443013405977652e-06,
|
|
"loss": 1.7482061386108398,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 1.39968487394958,
|
|
"grad_norm": 13.589295403290127,
|
|
"learning_rate": 6.440087149671932e-06,
|
|
"loss": 1.5376638174057007,
|
|
"step": 2665
|
|
},
|
|
{
|
|
"epoch": 1.4002100840336134,
|
|
"grad_norm": 13.139163850847654,
|
|
"learning_rate": 6.437160355390997e-06,
|
|
"loss": 1.6581084728240967,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 1.4007352941176472,
|
|
"grad_norm": 8.262781614669443,
|
|
"learning_rate": 6.434233024228209e-06,
|
|
"loss": 1.416346549987793,
|
|
"step": 2667
|
|
},
|
|
{
|
|
"epoch": 1.4012605042016806,
|
|
"grad_norm": 14.855034754990058,
|
|
"learning_rate": 6.431305157277139e-06,
|
|
"loss": 1.7294692993164062,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 1.4017857142857144,
|
|
"grad_norm": 7.980525829144439,
|
|
"learning_rate": 6.428376755631553e-06,
|
|
"loss": 1.3376318216323853,
|
|
"step": 2669
|
|
},
|
|
{
|
|
"epoch": 1.4023109243697478,
|
|
"grad_norm": 9.111471869390265,
|
|
"learning_rate": 6.4254478203854175e-06,
|
|
"loss": 1.1048033237457275,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 1.4028361344537816,
|
|
"grad_norm": 8.633811759504914,
|
|
"learning_rate": 6.422518352632898e-06,
|
|
"loss": 1.1107443571090698,
|
|
"step": 2671
|
|
},
|
|
{
|
|
"epoch": 1.403361344537815,
|
|
"grad_norm": 9.614913628192502,
|
|
"learning_rate": 6.419588353468361e-06,
|
|
"loss": 1.5569854974746704,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 1.4038865546218489,
|
|
"grad_norm": 14.525560523011887,
|
|
"learning_rate": 6.41665782398637e-06,
|
|
"loss": 1.8844988346099854,
|
|
"step": 2673
|
|
},
|
|
{
|
|
"epoch": 1.4044117647058822,
|
|
"grad_norm": 13.854399534689552,
|
|
"learning_rate": 6.413726765281685e-06,
|
|
"loss": 1.430602788925171,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 1.404936974789916,
|
|
"grad_norm": 10.373295258274258,
|
|
"learning_rate": 6.410795178449266e-06,
|
|
"loss": 0.9985050559043884,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 1.4054621848739495,
|
|
"grad_norm": 10.760439519982976,
|
|
"learning_rate": 6.407863064584271e-06,
|
|
"loss": 1.928354024887085,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 1.4059873949579833,
|
|
"grad_norm": 12.748172047930892,
|
|
"learning_rate": 6.404930424782052e-06,
|
|
"loss": 1.3580679893493652,
|
|
"step": 2677
|
|
},
|
|
{
|
|
"epoch": 1.4065126050420167,
|
|
"grad_norm": 7.350157703996195,
|
|
"learning_rate": 6.40199726013816e-06,
|
|
"loss": 1.6785142421722412,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 1.4070378151260505,
|
|
"grad_norm": 10.02657720960701,
|
|
"learning_rate": 6.3990635717483404e-06,
|
|
"loss": 1.535596489906311,
|
|
"step": 2679
|
|
},
|
|
{
|
|
"epoch": 1.407563025210084,
|
|
"grad_norm": 9.162115309527774,
|
|
"learning_rate": 6.396129360708537e-06,
|
|
"loss": 1.3276948928833008,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 1.4080882352941178,
|
|
"grad_norm": 8.401950811227174,
|
|
"learning_rate": 6.393194628114885e-06,
|
|
"loss": 1.2496113777160645,
|
|
"step": 2681
|
|
},
|
|
{
|
|
"epoch": 1.4086134453781511,
|
|
"grad_norm": 18.966908845623266,
|
|
"learning_rate": 6.390259375063714e-06,
|
|
"loss": 1.3329956531524658,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 1.409138655462185,
|
|
"grad_norm": 11.926368754198064,
|
|
"learning_rate": 6.387323602651554e-06,
|
|
"loss": 1.5731288194656372,
|
|
"step": 2683
|
|
},
|
|
{
|
|
"epoch": 1.4096638655462184,
|
|
"grad_norm": 14.364387574817396,
|
|
"learning_rate": 6.384387311975124e-06,
|
|
"loss": 1.4846248626708984,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 1.4101890756302522,
|
|
"grad_norm": 11.325711220917505,
|
|
"learning_rate": 6.381450504131339e-06,
|
|
"loss": 1.1658028364181519,
|
|
"step": 2685
|
|
},
|
|
{
|
|
"epoch": 1.4107142857142856,
|
|
"grad_norm": 9.454291241719664,
|
|
"learning_rate": 6.378513180217303e-06,
|
|
"loss": 1.37001633644104,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 1.4112394957983194,
|
|
"grad_norm": 11.729171590517273,
|
|
"learning_rate": 6.37557534133032e-06,
|
|
"loss": 1.3812564611434937,
|
|
"step": 2687
|
|
},
|
|
{
|
|
"epoch": 1.4117647058823528,
|
|
"grad_norm": 9.110161139375982,
|
|
"learning_rate": 6.3726369885678785e-06,
|
|
"loss": 1.5691630840301514,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 1.4122899159663866,
|
|
"grad_norm": 11.666376825432682,
|
|
"learning_rate": 6.369698123027664e-06,
|
|
"loss": 1.8340256214141846,
|
|
"step": 2689
|
|
},
|
|
{
|
|
"epoch": 1.41281512605042,
|
|
"grad_norm": 13.329603843645343,
|
|
"learning_rate": 6.366758745807554e-06,
|
|
"loss": 1.6556529998779297,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 1.4133403361344539,
|
|
"grad_norm": 10.920192684086777,
|
|
"learning_rate": 6.363818858005614e-06,
|
|
"loss": 1.2496334314346313,
|
|
"step": 2691
|
|
},
|
|
{
|
|
"epoch": 1.4138655462184873,
|
|
"grad_norm": 21.030961907191475,
|
|
"learning_rate": 6.360878460720101e-06,
|
|
"loss": 2.0735185146331787,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 1.414390756302521,
|
|
"grad_norm": 12.238898954810804,
|
|
"learning_rate": 6.357937555049465e-06,
|
|
"loss": 2.2130722999572754,
|
|
"step": 2693
|
|
},
|
|
{
|
|
"epoch": 1.4149159663865547,
|
|
"grad_norm": 11.893673609094371,
|
|
"learning_rate": 6.354996142092343e-06,
|
|
"loss": 1.3949319124221802,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 1.4154411764705883,
|
|
"grad_norm": 6.369629277415591,
|
|
"learning_rate": 6.35205422294756e-06,
|
|
"loss": 1.3924736976623535,
|
|
"step": 2695
|
|
},
|
|
{
|
|
"epoch": 1.415966386554622,
|
|
"grad_norm": 9.636829463557582,
|
|
"learning_rate": 6.349111798714136e-06,
|
|
"loss": 1.8141247034072876,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 1.4164915966386555,
|
|
"grad_norm": 15.188161640453659,
|
|
"learning_rate": 6.3461688704912735e-06,
|
|
"loss": 1.7034218311309814,
|
|
"step": 2697
|
|
},
|
|
{
|
|
"epoch": 1.4170168067226891,
|
|
"grad_norm": 12.406060949782468,
|
|
"learning_rate": 6.34322543937837e-06,
|
|
"loss": 2.171898126602173,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 1.4175420168067228,
|
|
"grad_norm": 16.879363153798028,
|
|
"learning_rate": 6.340281506475003e-06,
|
|
"loss": 1.3152217864990234,
|
|
"step": 2699
|
|
},
|
|
{
|
|
"epoch": 1.4180672268907564,
|
|
"grad_norm": 11.897718407530666,
|
|
"learning_rate": 6.337337072880942e-06,
|
|
"loss": 2.0754623413085938,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 1.41859243697479,
|
|
"grad_norm": 8.097156028830476,
|
|
"learning_rate": 6.334392139696144e-06,
|
|
"loss": 1.7747178077697754,
|
|
"step": 2701
|
|
},
|
|
{
|
|
"epoch": 1.4191176470588236,
|
|
"grad_norm": 15.835349283719063,
|
|
"learning_rate": 6.331446708020751e-06,
|
|
"loss": 1.4925724267959595,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 1.4196428571428572,
|
|
"grad_norm": 8.972494776001891,
|
|
"learning_rate": 6.328500778955091e-06,
|
|
"loss": 1.1682425737380981,
|
|
"step": 2703
|
|
},
|
|
{
|
|
"epoch": 1.4201680672268908,
|
|
"grad_norm": 16.005993054048783,
|
|
"learning_rate": 6.325554353599681e-06,
|
|
"loss": 1.5616768598556519,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 1.4206932773109244,
|
|
"grad_norm": 9.53703014155293,
|
|
"learning_rate": 6.322607433055217e-06,
|
|
"loss": 1.3121098279953003,
|
|
"step": 2705
|
|
},
|
|
{
|
|
"epoch": 1.421218487394958,
|
|
"grad_norm": 9.866894377440097,
|
|
"learning_rate": 6.3196600184225875e-06,
|
|
"loss": 1.2491700649261475,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 1.4217436974789917,
|
|
"grad_norm": 12.107020031566593,
|
|
"learning_rate": 6.31671211080286e-06,
|
|
"loss": 1.3733625411987305,
|
|
"step": 2707
|
|
},
|
|
{
|
|
"epoch": 1.4222689075630253,
|
|
"grad_norm": 10.488442066499326,
|
|
"learning_rate": 6.31376371129729e-06,
|
|
"loss": 1.749148964881897,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 1.4227941176470589,
|
|
"grad_norm": 16.226851146659296,
|
|
"learning_rate": 6.310814821007312e-06,
|
|
"loss": 1.3072221279144287,
|
|
"step": 2709
|
|
},
|
|
{
|
|
"epoch": 1.4233193277310925,
|
|
"grad_norm": 8.781505569756783,
|
|
"learning_rate": 6.3078654410345485e-06,
|
|
"loss": 1.71349036693573,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 1.423844537815126,
|
|
"grad_norm": 10.727948604474648,
|
|
"learning_rate": 6.304915572480803e-06,
|
|
"loss": 1.785348653793335,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 1.4243697478991597,
|
|
"grad_norm": 8.522798288972508,
|
|
"learning_rate": 6.301965216448062e-06,
|
|
"loss": 0.9674264192581177,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 1.4248949579831933,
|
|
"grad_norm": 17.51722415753573,
|
|
"learning_rate": 6.299014374038493e-06,
|
|
"loss": 1.4388782978057861,
|
|
"step": 2713
|
|
},
|
|
{
|
|
"epoch": 1.425420168067227,
|
|
"grad_norm": 8.741920039837364,
|
|
"learning_rate": 6.296063046354448e-06,
|
|
"loss": 1.347830057144165,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 1.4259453781512605,
|
|
"grad_norm": 9.675295109035934,
|
|
"learning_rate": 6.293111234498456e-06,
|
|
"loss": 1.2614622116088867,
|
|
"step": 2715
|
|
},
|
|
{
|
|
"epoch": 1.4264705882352942,
|
|
"grad_norm": 12.468022455838982,
|
|
"learning_rate": 6.29015893957323e-06,
|
|
"loss": 1.663257360458374,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 1.4269957983193278,
|
|
"grad_norm": 8.781387102591754,
|
|
"learning_rate": 6.287206162681663e-06,
|
|
"loss": 1.2915159463882446,
|
|
"step": 2717
|
|
},
|
|
{
|
|
"epoch": 1.4275210084033614,
|
|
"grad_norm": 15.535814336336738,
|
|
"learning_rate": 6.284252904926826e-06,
|
|
"loss": 1.846402645111084,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 1.428046218487395,
|
|
"grad_norm": 11.130476806401775,
|
|
"learning_rate": 6.281299167411975e-06,
|
|
"loss": 2.127185344696045,
|
|
"step": 2719
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 21.313055669016336,
|
|
"learning_rate": 6.278344951240537e-06,
|
|
"loss": 1.433441162109375,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 1.4290966386554622,
|
|
"grad_norm": 14.531003249615098,
|
|
"learning_rate": 6.275390257516125e-06,
|
|
"loss": 1.560739517211914,
|
|
"step": 2721
|
|
},
|
|
{
|
|
"epoch": 1.4296218487394958,
|
|
"grad_norm": 12.849787430675377,
|
|
"learning_rate": 6.2724350873425285e-06,
|
|
"loss": 1.3328757286071777,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 1.4301470588235294,
|
|
"grad_norm": 9.78905227006667,
|
|
"learning_rate": 6.269479441823712e-06,
|
|
"loss": 2.651552677154541,
|
|
"step": 2723
|
|
},
|
|
{
|
|
"epoch": 1.430672268907563,
|
|
"grad_norm": 7.738392960724227,
|
|
"learning_rate": 6.266523322063821e-06,
|
|
"loss": 1.7567307949066162,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 1.4311974789915967,
|
|
"grad_norm": 12.502016160279725,
|
|
"learning_rate": 6.263566729167177e-06,
|
|
"loss": 1.6257787942886353,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 1.4317226890756303,
|
|
"grad_norm": 17.777974679193576,
|
|
"learning_rate": 6.260609664238278e-06,
|
|
"loss": 1.6813796758651733,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 1.4322478991596639,
|
|
"grad_norm": 10.9861645693464,
|
|
"learning_rate": 6.257652128381798e-06,
|
|
"loss": 1.2643649578094482,
|
|
"step": 2727
|
|
},
|
|
{
|
|
"epoch": 1.4327731092436975,
|
|
"grad_norm": 13.62052228931387,
|
|
"learning_rate": 6.254694122702589e-06,
|
|
"loss": 0.9569892883300781,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 1.433298319327731,
|
|
"grad_norm": 11.133732791010567,
|
|
"learning_rate": 6.251735648305676e-06,
|
|
"loss": 1.6710110902786255,
|
|
"step": 2729
|
|
},
|
|
{
|
|
"epoch": 1.4338235294117647,
|
|
"grad_norm": 12.914654977164414,
|
|
"learning_rate": 6.24877670629626e-06,
|
|
"loss": 1.3672080039978027,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 1.4343487394957983,
|
|
"grad_norm": 12.281255047032186,
|
|
"learning_rate": 6.245817297779716e-06,
|
|
"loss": 2.125563621520996,
|
|
"step": 2731
|
|
},
|
|
{
|
|
"epoch": 1.434873949579832,
|
|
"grad_norm": 9.098304820351398,
|
|
"learning_rate": 6.242857423861597e-06,
|
|
"loss": 1.7239699363708496,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 1.4353991596638656,
|
|
"grad_norm": 6.934151955267918,
|
|
"learning_rate": 6.239897085647624e-06,
|
|
"loss": 1.8293339014053345,
|
|
"step": 2733
|
|
},
|
|
{
|
|
"epoch": 1.4359243697478992,
|
|
"grad_norm": 21.139878296146573,
|
|
"learning_rate": 6.236936284243695e-06,
|
|
"loss": 1.8869755268096924,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 1.4364495798319328,
|
|
"grad_norm": 8.508829231590704,
|
|
"learning_rate": 6.23397502075588e-06,
|
|
"loss": 1.1078177690505981,
|
|
"step": 2735
|
|
},
|
|
{
|
|
"epoch": 1.4369747899159664,
|
|
"grad_norm": 8.729160781114265,
|
|
"learning_rate": 6.231013296290425e-06,
|
|
"loss": 1.2480807304382324,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 1.4375,
|
|
"grad_norm": 13.610444769345298,
|
|
"learning_rate": 6.228051111953742e-06,
|
|
"loss": 2.1868491172790527,
|
|
"step": 2737
|
|
},
|
|
{
|
|
"epoch": 1.4380252100840336,
|
|
"grad_norm": 8.428366000493645,
|
|
"learning_rate": 6.225088468852418e-06,
|
|
"loss": 1.2806732654571533,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 1.4385504201680672,
|
|
"grad_norm": 10.604402798905818,
|
|
"learning_rate": 6.222125368093213e-06,
|
|
"loss": 1.1063880920410156,
|
|
"step": 2739
|
|
},
|
|
{
|
|
"epoch": 1.4390756302521008,
|
|
"grad_norm": 31.431759017420447,
|
|
"learning_rate": 6.219161810783057e-06,
|
|
"loss": 2.608151912689209,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 1.4396008403361344,
|
|
"grad_norm": 9.704102880620201,
|
|
"learning_rate": 6.216197798029049e-06,
|
|
"loss": 1.8975062370300293,
|
|
"step": 2741
|
|
},
|
|
{
|
|
"epoch": 1.440126050420168,
|
|
"grad_norm": 8.786043840419737,
|
|
"learning_rate": 6.213233330938456e-06,
|
|
"loss": 1.8726248741149902,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 1.4406512605042017,
|
|
"grad_norm": 15.659039793981638,
|
|
"learning_rate": 6.210268410618723e-06,
|
|
"loss": 1.9601554870605469,
|
|
"step": 2743
|
|
},
|
|
{
|
|
"epoch": 1.4411764705882353,
|
|
"grad_norm": 9.081157829260926,
|
|
"learning_rate": 6.207303038177457e-06,
|
|
"loss": 2.228078842163086,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 1.441701680672269,
|
|
"grad_norm": 9.77563223858514,
|
|
"learning_rate": 6.204337214722435e-06,
|
|
"loss": 1.5869779586791992,
|
|
"step": 2745
|
|
},
|
|
{
|
|
"epoch": 1.4422268907563025,
|
|
"grad_norm": 8.54218598421352,
|
|
"learning_rate": 6.201370941361603e-06,
|
|
"loss": 2.2011094093322754,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 1.4427521008403361,
|
|
"grad_norm": 12.422908355758468,
|
|
"learning_rate": 6.198404219203078e-06,
|
|
"loss": 1.746273398399353,
|
|
"step": 2747
|
|
},
|
|
{
|
|
"epoch": 1.4432773109243697,
|
|
"grad_norm": 10.784065381502472,
|
|
"learning_rate": 6.1954370493551415e-06,
|
|
"loss": 1.5087051391601562,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 1.4438025210084033,
|
|
"grad_norm": 7.654085791132907,
|
|
"learning_rate": 6.192469432926241e-06,
|
|
"loss": 1.4601963758468628,
|
|
"step": 2749
|
|
},
|
|
{
|
|
"epoch": 1.444327731092437,
|
|
"grad_norm": 11.055293317789749,
|
|
"learning_rate": 6.189501371024995e-06,
|
|
"loss": 1.2775212526321411,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 1.4448529411764706,
|
|
"grad_norm": 11.233046445606105,
|
|
"learning_rate": 6.186532864760186e-06,
|
|
"loss": 1.5675815343856812,
|
|
"step": 2751
|
|
},
|
|
{
|
|
"epoch": 1.4453781512605042,
|
|
"grad_norm": 9.430242842745319,
|
|
"learning_rate": 6.183563915240763e-06,
|
|
"loss": 1.4484829902648926,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 1.4459033613445378,
|
|
"grad_norm": 9.650257345529285,
|
|
"learning_rate": 6.180594523575838e-06,
|
|
"loss": 1.5815004110336304,
|
|
"step": 2753
|
|
},
|
|
{
|
|
"epoch": 1.4464285714285714,
|
|
"grad_norm": 16.290533767329066,
|
|
"learning_rate": 6.177624690874693e-06,
|
|
"loss": 2.012706756591797,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 1.446953781512605,
|
|
"grad_norm": 9.52004766857945,
|
|
"learning_rate": 6.174654418246772e-06,
|
|
"loss": 1.7970008850097656,
|
|
"step": 2755
|
|
},
|
|
{
|
|
"epoch": 1.4474789915966386,
|
|
"grad_norm": 11.0420388819629,
|
|
"learning_rate": 6.1716837068016825e-06,
|
|
"loss": 0.7532917261123657,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 1.4480042016806722,
|
|
"grad_norm": 9.032300626920227,
|
|
"learning_rate": 6.1687125576491945e-06,
|
|
"loss": 1.4576060771942139,
|
|
"step": 2757
|
|
},
|
|
{
|
|
"epoch": 1.4485294117647058,
|
|
"grad_norm": 7.431941950250911,
|
|
"learning_rate": 6.16574097189925e-06,
|
|
"loss": 1.5218952894210815,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 1.4490546218487395,
|
|
"grad_norm": 13.848389559198779,
|
|
"learning_rate": 6.162768950661945e-06,
|
|
"loss": 1.6575257778167725,
|
|
"step": 2759
|
|
},
|
|
{
|
|
"epoch": 1.449579831932773,
|
|
"grad_norm": 10.205416632073112,
|
|
"learning_rate": 6.15979649504754e-06,
|
|
"loss": 2.0593268871307373,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 1.4501050420168067,
|
|
"grad_norm": 14.671209130408167,
|
|
"learning_rate": 6.156823606166461e-06,
|
|
"loss": 1.703036904335022,
|
|
"step": 2761
|
|
},
|
|
{
|
|
"epoch": 1.4506302521008403,
|
|
"grad_norm": 14.195892914638451,
|
|
"learning_rate": 6.153850285129293e-06,
|
|
"loss": 1.147667646408081,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 1.451155462184874,
|
|
"grad_norm": 13.269428621486092,
|
|
"learning_rate": 6.150876533046784e-06,
|
|
"loss": 1.3205734491348267,
|
|
"step": 2763
|
|
},
|
|
{
|
|
"epoch": 1.4516806722689075,
|
|
"grad_norm": 9.353869151990617,
|
|
"learning_rate": 6.147902351029842e-06,
|
|
"loss": 1.0645673274993896,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 1.4522058823529411,
|
|
"grad_norm": 19.86336156613061,
|
|
"learning_rate": 6.144927740189537e-06,
|
|
"loss": 1.4619238376617432,
|
|
"step": 2765
|
|
},
|
|
{
|
|
"epoch": 1.4527310924369747,
|
|
"grad_norm": 15.934684600251815,
|
|
"learning_rate": 6.141952701637098e-06,
|
|
"loss": 1.5207512378692627,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 1.4532563025210083,
|
|
"grad_norm": 8.923483655391978,
|
|
"learning_rate": 6.138977236483912e-06,
|
|
"loss": 1.8451919555664062,
|
|
"step": 2767
|
|
},
|
|
{
|
|
"epoch": 1.453781512605042,
|
|
"grad_norm": 15.248024961440924,
|
|
"learning_rate": 6.1360013458415276e-06,
|
|
"loss": 1.460693120956421,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 1.4543067226890756,
|
|
"grad_norm": 8.404726421486403,
|
|
"learning_rate": 6.133025030821656e-06,
|
|
"loss": 2.239863157272339,
|
|
"step": 2769
|
|
},
|
|
{
|
|
"epoch": 1.4548319327731092,
|
|
"grad_norm": 13.586024669612279,
|
|
"learning_rate": 6.130048292536158e-06,
|
|
"loss": 1.5917932987213135,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 1.4553571428571428,
|
|
"grad_norm": 8.033148961170872,
|
|
"learning_rate": 6.127071132097061e-06,
|
|
"loss": 1.2371349334716797,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 1.4558823529411764,
|
|
"grad_norm": 13.998857978072763,
|
|
"learning_rate": 6.124093550616544e-06,
|
|
"loss": 1.7588425874710083,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 1.45640756302521,
|
|
"grad_norm": 17.617515653278907,
|
|
"learning_rate": 6.12111554920695e-06,
|
|
"loss": 1.9410431385040283,
|
|
"step": 2773
|
|
},
|
|
{
|
|
"epoch": 1.4569327731092436,
|
|
"grad_norm": 9.161409370597044,
|
|
"learning_rate": 6.118137128980771e-06,
|
|
"loss": 0.9624344706535339,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 1.4574579831932772,
|
|
"grad_norm": 9.52809010695272,
|
|
"learning_rate": 6.11515829105066e-06,
|
|
"loss": 1.4326848983764648,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 1.4579831932773109,
|
|
"grad_norm": 10.023203255773875,
|
|
"learning_rate": 6.112179036529426e-06,
|
|
"loss": 1.4362763166427612,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 1.4585084033613445,
|
|
"grad_norm": 12.801240692566088,
|
|
"learning_rate": 6.1091993665300354e-06,
|
|
"loss": 1.397044062614441,
|
|
"step": 2777
|
|
},
|
|
{
|
|
"epoch": 1.459033613445378,
|
|
"grad_norm": 15.212209550852911,
|
|
"learning_rate": 6.106219282165603e-06,
|
|
"loss": 1.1173803806304932,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 1.4595588235294117,
|
|
"grad_norm": 10.63706747298648,
|
|
"learning_rate": 6.103238784549404e-06,
|
|
"loss": 1.6797499656677246,
|
|
"step": 2779
|
|
},
|
|
{
|
|
"epoch": 1.4600840336134453,
|
|
"grad_norm": 7.368362917754149,
|
|
"learning_rate": 6.1002578747948686e-06,
|
|
"loss": 1.9404041767120361,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 1.460609243697479,
|
|
"grad_norm": 16.695431205021084,
|
|
"learning_rate": 6.0972765540155764e-06,
|
|
"loss": 1.6921062469482422,
|
|
"step": 2781
|
|
},
|
|
{
|
|
"epoch": 1.4611344537815127,
|
|
"grad_norm": 18.507486596298616,
|
|
"learning_rate": 6.0942948233252655e-06,
|
|
"loss": 1.4276188611984253,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 1.4616596638655461,
|
|
"grad_norm": 13.979346214698044,
|
|
"learning_rate": 6.091312683837823e-06,
|
|
"loss": 1.2384809255599976,
|
|
"step": 2783
|
|
},
|
|
{
|
|
"epoch": 1.46218487394958,
|
|
"grad_norm": 11.236858096804077,
|
|
"learning_rate": 6.088330136667294e-06,
|
|
"loss": 1.4504996538162231,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 1.4627100840336134,
|
|
"grad_norm": 9.754105170854729,
|
|
"learning_rate": 6.08534718292787e-06,
|
|
"loss": 2.2792468070983887,
|
|
"step": 2785
|
|
},
|
|
{
|
|
"epoch": 1.4632352941176472,
|
|
"grad_norm": 10.261888445214334,
|
|
"learning_rate": 6.082363823733897e-06,
|
|
"loss": 1.542618751525879,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 1.4637605042016806,
|
|
"grad_norm": 15.713836107112463,
|
|
"learning_rate": 6.07938006019987e-06,
|
|
"loss": 1.386710524559021,
|
|
"step": 2787
|
|
},
|
|
{
|
|
"epoch": 1.4642857142857144,
|
|
"grad_norm": 10.27332010013314,
|
|
"learning_rate": 6.076395893440442e-06,
|
|
"loss": 1.9322963953018188,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 1.4648109243697478,
|
|
"grad_norm": 12.404609035213884,
|
|
"learning_rate": 6.07341132457041e-06,
|
|
"loss": 1.8743345737457275,
|
|
"step": 2789
|
|
},
|
|
{
|
|
"epoch": 1.4653361344537816,
|
|
"grad_norm": 9.417357232035894,
|
|
"learning_rate": 6.070426354704723e-06,
|
|
"loss": 1.754909634590149,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 1.465861344537815,
|
|
"grad_norm": 9.593555949937741,
|
|
"learning_rate": 6.067440984958479e-06,
|
|
"loss": 1.671936273574829,
|
|
"step": 2791
|
|
},
|
|
{
|
|
"epoch": 1.4663865546218489,
|
|
"grad_norm": 7.774702693146011,
|
|
"learning_rate": 6.064455216446929e-06,
|
|
"loss": 1.509257435798645,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 1.4669117647058822,
|
|
"grad_norm": 17.78768586767669,
|
|
"learning_rate": 6.061469050285469e-06,
|
|
"loss": 1.269576072692871,
|
|
"step": 2793
|
|
},
|
|
{
|
|
"epoch": 1.467436974789916,
|
|
"grad_norm": 13.45180608305342,
|
|
"learning_rate": 6.058482487589644e-06,
|
|
"loss": 1.6840946674346924,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 1.4679621848739495,
|
|
"grad_norm": 7.7412920982889295,
|
|
"learning_rate": 6.055495529475149e-06,
|
|
"loss": 1.9168556928634644,
|
|
"step": 2795
|
|
},
|
|
{
|
|
"epoch": 1.4684873949579833,
|
|
"grad_norm": 7.294054716643995,
|
|
"learning_rate": 6.0525081770578265e-06,
|
|
"loss": 1.2719926834106445,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 1.4690126050420167,
|
|
"grad_norm": 11.068267519959369,
|
|
"learning_rate": 6.049520431453666e-06,
|
|
"loss": 1.2731499671936035,
|
|
"step": 2797
|
|
},
|
|
{
|
|
"epoch": 1.4695378151260505,
|
|
"grad_norm": 9.807784474363542,
|
|
"learning_rate": 6.0465322937788e-06,
|
|
"loss": 1.6818287372589111,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 1.470063025210084,
|
|
"grad_norm": 12.181562084315226,
|
|
"learning_rate": 6.043543765149514e-06,
|
|
"loss": 1.31424880027771,
|
|
"step": 2799
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 9.106684508139276,
|
|
"learning_rate": 6.040554846682237e-06,
|
|
"loss": 2.372061252593994,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 1.4711134453781511,
|
|
"grad_norm": 9.874967096732872,
|
|
"learning_rate": 6.037565539493542e-06,
|
|
"loss": 1.5737472772598267,
|
|
"step": 2801
|
|
},
|
|
{
|
|
"epoch": 1.471638655462185,
|
|
"grad_norm": 8.045375010534565,
|
|
"learning_rate": 6.034575844700148e-06,
|
|
"loss": 1.7884564399719238,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 1.4721638655462184,
|
|
"grad_norm": 11.35333671438149,
|
|
"learning_rate": 6.031585763418919e-06,
|
|
"loss": 1.6468238830566406,
|
|
"step": 2803
|
|
},
|
|
{
|
|
"epoch": 1.4726890756302522,
|
|
"grad_norm": 8.28167539536036,
|
|
"learning_rate": 6.028595296766865e-06,
|
|
"loss": 1.7392634153366089,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 1.4732142857142856,
|
|
"grad_norm": 8.540889665677351,
|
|
"learning_rate": 6.025604445861137e-06,
|
|
"loss": 1.6154978275299072,
|
|
"step": 2805
|
|
},
|
|
{
|
|
"epoch": 1.4737394957983194,
|
|
"grad_norm": 15.244155133260243,
|
|
"learning_rate": 6.02261321181903e-06,
|
|
"loss": 1.1525774002075195,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 1.4742647058823528,
|
|
"grad_norm": 15.04475936959345,
|
|
"learning_rate": 6.019621595757987e-06,
|
|
"loss": 2.7511398792266846,
|
|
"step": 2807
|
|
},
|
|
{
|
|
"epoch": 1.4747899159663866,
|
|
"grad_norm": 8.409463833082263,
|
|
"learning_rate": 6.016629598795587e-06,
|
|
"loss": 2.1453425884246826,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 1.47531512605042,
|
|
"grad_norm": 10.467846936437327,
|
|
"learning_rate": 6.013637222049554e-06,
|
|
"loss": 1.3417030572891235,
|
|
"step": 2809
|
|
},
|
|
{
|
|
"epoch": 1.4758403361344539,
|
|
"grad_norm": 18.74141609993802,
|
|
"learning_rate": 6.010644466637756e-06,
|
|
"loss": 1.423673152923584,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 1.4763655462184873,
|
|
"grad_norm": 11.343846267002052,
|
|
"learning_rate": 6.007651333678199e-06,
|
|
"loss": 1.3917582035064697,
|
|
"step": 2811
|
|
},
|
|
{
|
|
"epoch": 1.476890756302521,
|
|
"grad_norm": 11.211814827240355,
|
|
"learning_rate": 6.004657824289031e-06,
|
|
"loss": 1.8203171491622925,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 1.4774159663865547,
|
|
"grad_norm": 8.32431234549036,
|
|
"learning_rate": 6.0016639395885424e-06,
|
|
"loss": 1.677531123161316,
|
|
"step": 2813
|
|
},
|
|
{
|
|
"epoch": 1.4779411764705883,
|
|
"grad_norm": 9.081860364888588,
|
|
"learning_rate": 5.9986696806951625e-06,
|
|
"loss": 1.5137662887573242,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 1.478466386554622,
|
|
"grad_norm": 7.08997854765275,
|
|
"learning_rate": 5.995675048727461e-06,
|
|
"loss": 1.5967217683792114,
|
|
"step": 2815
|
|
},
|
|
{
|
|
"epoch": 1.4789915966386555,
|
|
"grad_norm": 11.620168337811307,
|
|
"learning_rate": 5.9926800448041446e-06,
|
|
"loss": 2.290982723236084,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 1.4795168067226891,
|
|
"grad_norm": 9.169178568275145,
|
|
"learning_rate": 5.98968467004406e-06,
|
|
"loss": 1.9395719766616821,
|
|
"step": 2817
|
|
},
|
|
{
|
|
"epoch": 1.4800420168067228,
|
|
"grad_norm": 19.43973270209733,
|
|
"learning_rate": 5.986688925566198e-06,
|
|
"loss": 1.914243221282959,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 1.4805672268907564,
|
|
"grad_norm": 14.79213281848686,
|
|
"learning_rate": 5.983692812489679e-06,
|
|
"loss": 1.8757904767990112,
|
|
"step": 2819
|
|
},
|
|
{
|
|
"epoch": 1.48109243697479,
|
|
"grad_norm": 6.623780138555651,
|
|
"learning_rate": 5.980696331933764e-06,
|
|
"loss": 1.7306898832321167,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 1.4816176470588236,
|
|
"grad_norm": 14.665166895474977,
|
|
"learning_rate": 5.977699485017855e-06,
|
|
"loss": 1.2540236711502075,
|
|
"step": 2821
|
|
},
|
|
{
|
|
"epoch": 1.4821428571428572,
|
|
"grad_norm": 8.833773573076922,
|
|
"learning_rate": 5.974702272861487e-06,
|
|
"loss": 1.315044641494751,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 1.4826680672268908,
|
|
"grad_norm": 10.48223864739591,
|
|
"learning_rate": 5.971704696584332e-06,
|
|
"loss": 1.7251434326171875,
|
|
"step": 2823
|
|
},
|
|
{
|
|
"epoch": 1.4831932773109244,
|
|
"grad_norm": 11.048554063714633,
|
|
"learning_rate": 5.9687067573061965e-06,
|
|
"loss": 1.7319231033325195,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 1.483718487394958,
|
|
"grad_norm": 8.38825084535047,
|
|
"learning_rate": 5.965708456147028e-06,
|
|
"loss": 1.8665804862976074,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 1.4842436974789917,
|
|
"grad_norm": 7.369771367824362,
|
|
"learning_rate": 5.962709794226905e-06,
|
|
"loss": 0.691155731678009,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 1.4847689075630253,
|
|
"grad_norm": 14.073045570916406,
|
|
"learning_rate": 5.959710772666041e-06,
|
|
"loss": 1.813007116317749,
|
|
"step": 2827
|
|
},
|
|
{
|
|
"epoch": 1.4852941176470589,
|
|
"grad_norm": 9.572866240140566,
|
|
"learning_rate": 5.956711392584782e-06,
|
|
"loss": 2.897813558578491,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 1.4858193277310925,
|
|
"grad_norm": 16.91968264850313,
|
|
"learning_rate": 5.953711655103615e-06,
|
|
"loss": 1.4889321327209473,
|
|
"step": 2829
|
|
},
|
|
{
|
|
"epoch": 1.486344537815126,
|
|
"grad_norm": 9.994243480776813,
|
|
"learning_rate": 5.950711561343152e-06,
|
|
"loss": 1.1888728141784668,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 1.4868697478991597,
|
|
"grad_norm": 18.090993697846038,
|
|
"learning_rate": 5.947711112424142e-06,
|
|
"loss": 2.311788558959961,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 1.4873949579831933,
|
|
"grad_norm": 10.698621788325813,
|
|
"learning_rate": 5.94471030946747e-06,
|
|
"loss": 1.3138668537139893,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 1.487920168067227,
|
|
"grad_norm": 15.754072248005881,
|
|
"learning_rate": 5.941709153594146e-06,
|
|
"loss": 1.4495904445648193,
|
|
"step": 2833
|
|
},
|
|
{
|
|
"epoch": 1.4884453781512605,
|
|
"grad_norm": 15.435243495547523,
|
|
"learning_rate": 5.9387076459253175e-06,
|
|
"loss": 1.3696357011795044,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 1.4889705882352942,
|
|
"grad_norm": 9.970533354540235,
|
|
"learning_rate": 5.935705787582261e-06,
|
|
"loss": 1.2953077554702759,
|
|
"step": 2835
|
|
},
|
|
{
|
|
"epoch": 1.4894957983193278,
|
|
"grad_norm": 11.608649036564305,
|
|
"learning_rate": 5.932703579686385e-06,
|
|
"loss": 2.515267848968506,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 1.4900210084033614,
|
|
"grad_norm": 11.861396337213627,
|
|
"learning_rate": 5.92970102335923e-06,
|
|
"loss": 1.2390263080596924,
|
|
"step": 2837
|
|
},
|
|
{
|
|
"epoch": 1.490546218487395,
|
|
"grad_norm": 10.330756083096254,
|
|
"learning_rate": 5.9266981197224615e-06,
|
|
"loss": 1.96694016456604,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 1.4910714285714286,
|
|
"grad_norm": 9.1598301030574,
|
|
"learning_rate": 5.923694869897879e-06,
|
|
"loss": 2.5066561698913574,
|
|
"step": 2839
|
|
},
|
|
{
|
|
"epoch": 1.4915966386554622,
|
|
"grad_norm": 9.009991462237936,
|
|
"learning_rate": 5.920691275007412e-06,
|
|
"loss": 1.6626849174499512,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 1.4921218487394958,
|
|
"grad_norm": 14.128976680056349,
|
|
"learning_rate": 5.917687336173116e-06,
|
|
"loss": 1.5072438716888428,
|
|
"step": 2841
|
|
},
|
|
{
|
|
"epoch": 1.4926470588235294,
|
|
"grad_norm": 13.988741293808523,
|
|
"learning_rate": 5.914683054517176e-06,
|
|
"loss": 2.1789119243621826,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 1.493172268907563,
|
|
"grad_norm": 10.173288182889692,
|
|
"learning_rate": 5.911678431161907e-06,
|
|
"loss": 1.802624225616455,
|
|
"step": 2843
|
|
},
|
|
{
|
|
"epoch": 1.4936974789915967,
|
|
"grad_norm": 11.26877200199326,
|
|
"learning_rate": 5.908673467229749e-06,
|
|
"loss": 1.7642006874084473,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 1.4942226890756303,
|
|
"grad_norm": 19.46610092594002,
|
|
"learning_rate": 5.905668163843269e-06,
|
|
"loss": 1.7786991596221924,
|
|
"step": 2845
|
|
},
|
|
{
|
|
"epoch": 1.4947478991596639,
|
|
"grad_norm": 18.165207809056835,
|
|
"learning_rate": 5.902662522125163e-06,
|
|
"loss": 1.3276612758636475,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 1.4952731092436975,
|
|
"grad_norm": 12.780426008235525,
|
|
"learning_rate": 5.899656543198254e-06,
|
|
"loss": 1.3048717975616455,
|
|
"step": 2847
|
|
},
|
|
{
|
|
"epoch": 1.495798319327731,
|
|
"grad_norm": 12.545158971853072,
|
|
"learning_rate": 5.8966502281854885e-06,
|
|
"loss": 2.6292881965637207,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 1.4963235294117647,
|
|
"grad_norm": 14.0427189863656,
|
|
"learning_rate": 5.893643578209939e-06,
|
|
"loss": 1.301329493522644,
|
|
"step": 2849
|
|
},
|
|
{
|
|
"epoch": 1.4968487394957983,
|
|
"grad_norm": 14.90071697810995,
|
|
"learning_rate": 5.890636594394803e-06,
|
|
"loss": 1.9319939613342285,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 1.497373949579832,
|
|
"grad_norm": 10.235982684128693,
|
|
"learning_rate": 5.887629277863405e-06,
|
|
"loss": 1.2810033559799194,
|
|
"step": 2851
|
|
},
|
|
{
|
|
"epoch": 1.4978991596638656,
|
|
"grad_norm": 11.051027681332855,
|
|
"learning_rate": 5.884621629739191e-06,
|
|
"loss": 1.858025074005127,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 1.4984243697478992,
|
|
"grad_norm": 11.768480745372527,
|
|
"learning_rate": 5.881613651145732e-06,
|
|
"loss": 1.4819607734680176,
|
|
"step": 2853
|
|
},
|
|
{
|
|
"epoch": 1.4989495798319328,
|
|
"grad_norm": 10.47462659426203,
|
|
"learning_rate": 5.878605343206722e-06,
|
|
"loss": 1.3072776794433594,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 1.4994747899159664,
|
|
"grad_norm": 9.485016559187,
|
|
"learning_rate": 5.875596707045982e-06,
|
|
"loss": 1.3459935188293457,
|
|
"step": 2855
|
|
},
|
|
{
|
|
"epoch": 1.5,
|
|
"grad_norm": 10.238123655792794,
|
|
"learning_rate": 5.872587743787447e-06,
|
|
"loss": 1.3451051712036133,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 1.5005252100840336,
|
|
"grad_norm": 17.508158441331556,
|
|
"learning_rate": 5.8695784545551815e-06,
|
|
"loss": 1.6816987991333008,
|
|
"step": 2857
|
|
},
|
|
{
|
|
"epoch": 1.5010504201680672,
|
|
"grad_norm": 13.919394577120519,
|
|
"learning_rate": 5.86656884047337e-06,
|
|
"loss": 2.0135819911956787,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 1.5015756302521008,
|
|
"grad_norm": 9.85490795789548,
|
|
"learning_rate": 5.863558902666318e-06,
|
|
"loss": 1.4393882751464844,
|
|
"step": 2859
|
|
},
|
|
{
|
|
"epoch": 1.5021008403361344,
|
|
"grad_norm": 12.027119139841197,
|
|
"learning_rate": 5.860548642258451e-06,
|
|
"loss": 1.8411206007003784,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 1.502626050420168,
|
|
"grad_norm": 11.578607467451782,
|
|
"learning_rate": 5.8575380603743155e-06,
|
|
"loss": 1.3521913290023804,
|
|
"step": 2861
|
|
},
|
|
{
|
|
"epoch": 1.5031512605042017,
|
|
"grad_norm": 15.786064218086343,
|
|
"learning_rate": 5.85452715813858e-06,
|
|
"loss": 1.388970971107483,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 1.5036764705882353,
|
|
"grad_norm": 20.27257774565252,
|
|
"learning_rate": 5.851515936676031e-06,
|
|
"loss": 1.084679126739502,
|
|
"step": 2863
|
|
},
|
|
{
|
|
"epoch": 1.504201680672269,
|
|
"grad_norm": 14.026872328523538,
|
|
"learning_rate": 5.848504397111573e-06,
|
|
"loss": 1.4439821243286133,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 1.5047268907563025,
|
|
"grad_norm": 38.96072770612892,
|
|
"learning_rate": 5.8454925405702326e-06,
|
|
"loss": 1.7968759536743164,
|
|
"step": 2865
|
|
},
|
|
{
|
|
"epoch": 1.5052521008403361,
|
|
"grad_norm": 23.567587821248285,
|
|
"learning_rate": 5.8424803681771505e-06,
|
|
"loss": 1.5943264961242676,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 1.5057773109243697,
|
|
"grad_norm": 19.27584586072049,
|
|
"learning_rate": 5.83946788105759e-06,
|
|
"loss": 1.861040472984314,
|
|
"step": 2867
|
|
},
|
|
{
|
|
"epoch": 1.5063025210084033,
|
|
"grad_norm": 11.820467374303973,
|
|
"learning_rate": 5.836455080336929e-06,
|
|
"loss": 1.2037864923477173,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 1.506827731092437,
|
|
"grad_norm": 15.137479556828536,
|
|
"learning_rate": 5.833441967140662e-06,
|
|
"loss": 1.1817970275878906,
|
|
"step": 2869
|
|
},
|
|
{
|
|
"epoch": 1.5073529411764706,
|
|
"grad_norm": 8.516086510212022,
|
|
"learning_rate": 5.830428542594404e-06,
|
|
"loss": 1.6967062950134277,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 1.5078781512605042,
|
|
"grad_norm": 10.560017119280444,
|
|
"learning_rate": 5.827414807823884e-06,
|
|
"loss": 1.0605194568634033,
|
|
"step": 2871
|
|
},
|
|
{
|
|
"epoch": 1.5084033613445378,
|
|
"grad_norm": 10.834636238912983,
|
|
"learning_rate": 5.824400763954944e-06,
|
|
"loss": 2.2708017826080322,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 1.5089285714285714,
|
|
"grad_norm": 11.979837739536155,
|
|
"learning_rate": 5.821386412113546e-06,
|
|
"loss": 2.6514854431152344,
|
|
"step": 2873
|
|
},
|
|
{
|
|
"epoch": 1.509453781512605,
|
|
"grad_norm": 19.12069884389941,
|
|
"learning_rate": 5.818371753425764e-06,
|
|
"loss": 1.6612210273742676,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 1.5099789915966386,
|
|
"grad_norm": 9.907309346645576,
|
|
"learning_rate": 5.815356789017791e-06,
|
|
"loss": 1.5609991550445557,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 1.5105042016806722,
|
|
"grad_norm": 14.457357561212142,
|
|
"learning_rate": 5.812341520015929e-06,
|
|
"loss": 2.3147692680358887,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 1.5110294117647058,
|
|
"grad_norm": 9.089480918573374,
|
|
"learning_rate": 5.809325947546596e-06,
|
|
"loss": 1.7276939153671265,
|
|
"step": 2877
|
|
},
|
|
{
|
|
"epoch": 1.5115546218487395,
|
|
"grad_norm": 8.081566611738515,
|
|
"learning_rate": 5.806310072736323e-06,
|
|
"loss": 1.9004578590393066,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 1.512079831932773,
|
|
"grad_norm": 13.897502533404047,
|
|
"learning_rate": 5.803293896711756e-06,
|
|
"loss": 1.6075999736785889,
|
|
"step": 2879
|
|
},
|
|
{
|
|
"epoch": 1.5126050420168067,
|
|
"grad_norm": 10.021929429682709,
|
|
"learning_rate": 5.800277420599649e-06,
|
|
"loss": 2.075315475463867,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 1.5131302521008403,
|
|
"grad_norm": 9.306843698596571,
|
|
"learning_rate": 5.797260645526873e-06,
|
|
"loss": 1.6688923835754395,
|
|
"step": 2881
|
|
},
|
|
{
|
|
"epoch": 1.513655462184874,
|
|
"grad_norm": 10.036367052729391,
|
|
"learning_rate": 5.794243572620408e-06,
|
|
"loss": 1.193693995475769,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 1.5141806722689075,
|
|
"grad_norm": 10.740232814889865,
|
|
"learning_rate": 5.791226203007346e-06,
|
|
"loss": 2.004913091659546,
|
|
"step": 2883
|
|
},
|
|
{
|
|
"epoch": 1.5147058823529411,
|
|
"grad_norm": 11.999598724419366,
|
|
"learning_rate": 5.788208537814889e-06,
|
|
"loss": 1.4250155687332153,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 1.5152310924369747,
|
|
"grad_norm": 12.223837924953441,
|
|
"learning_rate": 5.785190578170351e-06,
|
|
"loss": 1.5953729152679443,
|
|
"step": 2885
|
|
},
|
|
{
|
|
"epoch": 1.5157563025210083,
|
|
"grad_norm": 24.216139786646707,
|
|
"learning_rate": 5.782172325201155e-06,
|
|
"loss": 1.6154977083206177,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 1.5162815126050422,
|
|
"grad_norm": 7.471918874765334,
|
|
"learning_rate": 5.779153780034833e-06,
|
|
"loss": 1.424997329711914,
|
|
"step": 2887
|
|
},
|
|
{
|
|
"epoch": 1.5168067226890756,
|
|
"grad_norm": 13.808427928663413,
|
|
"learning_rate": 5.7761349437990255e-06,
|
|
"loss": 1.3849037885665894,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 1.5173319327731094,
|
|
"grad_norm": 9.123275417455284,
|
|
"learning_rate": 5.773115817621487e-06,
|
|
"loss": 1.92661714553833,
|
|
"step": 2889
|
|
},
|
|
{
|
|
"epoch": 1.5178571428571428,
|
|
"grad_norm": 13.72838263775455,
|
|
"learning_rate": 5.770096402630073e-06,
|
|
"loss": 1.0330466032028198,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 1.5183823529411766,
|
|
"grad_norm": 7.655300808201822,
|
|
"learning_rate": 5.767076699952751e-06,
|
|
"loss": 1.8486248254776,
|
|
"step": 2891
|
|
},
|
|
{
|
|
"epoch": 1.51890756302521,
|
|
"grad_norm": 7.8014609929651435,
|
|
"learning_rate": 5.764056710717596e-06,
|
|
"loss": 1.4996156692504883,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 1.5194327731092439,
|
|
"grad_norm": 12.039568608523037,
|
|
"learning_rate": 5.761036436052788e-06,
|
|
"loss": 1.8443527221679688,
|
|
"step": 2893
|
|
},
|
|
{
|
|
"epoch": 1.5199579831932772,
|
|
"grad_norm": 8.659391542247635,
|
|
"learning_rate": 5.758015877086616e-06,
|
|
"loss": 1.3554350137710571,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 1.520483193277311,
|
|
"grad_norm": 12.95903705623489,
|
|
"learning_rate": 5.754995034947474e-06,
|
|
"loss": 1.8053975105285645,
|
|
"step": 2895
|
|
},
|
|
{
|
|
"epoch": 1.5210084033613445,
|
|
"grad_norm": 9.327048639656558,
|
|
"learning_rate": 5.751973910763862e-06,
|
|
"loss": 1.6848835945129395,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 1.5215336134453783,
|
|
"grad_norm": 19.302375729414894,
|
|
"learning_rate": 5.748952505664385e-06,
|
|
"loss": 3.980196952819824,
|
|
"step": 2897
|
|
},
|
|
{
|
|
"epoch": 1.5220588235294117,
|
|
"grad_norm": 18.66316165606761,
|
|
"learning_rate": 5.745930820777753e-06,
|
|
"loss": 1.6223571300506592,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 1.5225840336134455,
|
|
"grad_norm": 12.734923210269024,
|
|
"learning_rate": 5.74290885723278e-06,
|
|
"loss": 1.6548211574554443,
|
|
"step": 2899
|
|
},
|
|
{
|
|
"epoch": 1.523109243697479,
|
|
"grad_norm": 7.1223652189520665,
|
|
"learning_rate": 5.739886616158386e-06,
|
|
"loss": 0.8713272213935852,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 1.5236344537815127,
|
|
"grad_norm": 9.470825827621523,
|
|
"learning_rate": 5.736864098683595e-06,
|
|
"loss": 2.23360538482666,
|
|
"step": 2901
|
|
},
|
|
{
|
|
"epoch": 1.5241596638655461,
|
|
"grad_norm": 8.751133349958469,
|
|
"learning_rate": 5.7338413059375285e-06,
|
|
"loss": 1.426957130432129,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 1.52468487394958,
|
|
"grad_norm": 13.733697650918987,
|
|
"learning_rate": 5.7308182390494185e-06,
|
|
"loss": 1.4930875301361084,
|
|
"step": 2903
|
|
},
|
|
{
|
|
"epoch": 1.5252100840336134,
|
|
"grad_norm": 12.982363979908152,
|
|
"learning_rate": 5.727794899148596e-06,
|
|
"loss": 1.4669435024261475,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 1.5257352941176472,
|
|
"grad_norm": 15.848490300949237,
|
|
"learning_rate": 5.724771287364492e-06,
|
|
"loss": 1.7891262769699097,
|
|
"step": 2905
|
|
},
|
|
{
|
|
"epoch": 1.5262605042016806,
|
|
"grad_norm": 12.59163818717848,
|
|
"learning_rate": 5.721747404826641e-06,
|
|
"loss": 1.8997935056686401,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 1.5267857142857144,
|
|
"grad_norm": 15.280794260801414,
|
|
"learning_rate": 5.718723252664682e-06,
|
|
"loss": 1.4946014881134033,
|
|
"step": 2907
|
|
},
|
|
{
|
|
"epoch": 1.5273109243697478,
|
|
"grad_norm": 9.493566868179185,
|
|
"learning_rate": 5.7156988320083485e-06,
|
|
"loss": 1.504456877708435,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 1.5278361344537816,
|
|
"grad_norm": 11.039979166730305,
|
|
"learning_rate": 5.712674143987478e-06,
|
|
"loss": 1.36366868019104,
|
|
"step": 2909
|
|
},
|
|
{
|
|
"epoch": 1.528361344537815,
|
|
"grad_norm": 9.266488878179008,
|
|
"learning_rate": 5.709649189732006e-06,
|
|
"loss": 1.622373104095459,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 1.5288865546218489,
|
|
"grad_norm": 13.513185773637307,
|
|
"learning_rate": 5.706623970371972e-06,
|
|
"loss": 1.2717863321304321,
|
|
"step": 2911
|
|
},
|
|
{
|
|
"epoch": 1.5294117647058822,
|
|
"grad_norm": 20.433933596775127,
|
|
"learning_rate": 5.7035984870375075e-06,
|
|
"loss": 1.8660664558410645,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 1.529936974789916,
|
|
"grad_norm": 18.23207759654784,
|
|
"learning_rate": 5.700572740858847e-06,
|
|
"loss": 1.1541085243225098,
|
|
"step": 2913
|
|
},
|
|
{
|
|
"epoch": 1.5304621848739495,
|
|
"grad_norm": 13.14098786673324,
|
|
"learning_rate": 5.697546732966323e-06,
|
|
"loss": 0.860084056854248,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 1.5309873949579833,
|
|
"grad_norm": 16.3322604680947,
|
|
"learning_rate": 5.694520464490365e-06,
|
|
"loss": 1.4657001495361328,
|
|
"step": 2915
|
|
},
|
|
{
|
|
"epoch": 1.5315126050420167,
|
|
"grad_norm": 17.006787249171897,
|
|
"learning_rate": 5.6914939365615e-06,
|
|
"loss": 1.237950325012207,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 1.5320378151260505,
|
|
"grad_norm": 13.491224137589745,
|
|
"learning_rate": 5.688467150310353e-06,
|
|
"loss": 1.5527464151382446,
|
|
"step": 2917
|
|
},
|
|
{
|
|
"epoch": 1.532563025210084,
|
|
"grad_norm": 10.91222177976933,
|
|
"learning_rate": 5.685440106867642e-06,
|
|
"loss": 1.6397098302841187,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 1.5330882352941178,
|
|
"grad_norm": 10.607586571968769,
|
|
"learning_rate": 5.682412807364187e-06,
|
|
"loss": 2.186699390411377,
|
|
"step": 2919
|
|
},
|
|
{
|
|
"epoch": 1.5336134453781511,
|
|
"grad_norm": 7.1677369047294235,
|
|
"learning_rate": 5.6793852529308965e-06,
|
|
"loss": 1.648890733718872,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 1.534138655462185,
|
|
"grad_norm": 12.859974108533141,
|
|
"learning_rate": 5.67635744469878e-06,
|
|
"loss": 1.0208791494369507,
|
|
"step": 2921
|
|
},
|
|
{
|
|
"epoch": 1.5346638655462184,
|
|
"grad_norm": 9.512064502130299,
|
|
"learning_rate": 5.67332938379894e-06,
|
|
"loss": 1.5499924421310425,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 1.5351890756302522,
|
|
"grad_norm": 13.143085652501446,
|
|
"learning_rate": 5.6703010713625715e-06,
|
|
"loss": 1.8655292987823486,
|
|
"step": 2923
|
|
},
|
|
{
|
|
"epoch": 1.5357142857142856,
|
|
"grad_norm": 9.426891918390485,
|
|
"learning_rate": 5.667272508520968e-06,
|
|
"loss": 1.5644574165344238,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 1.5362394957983194,
|
|
"grad_norm": 17.52336736081476,
|
|
"learning_rate": 5.664243696405509e-06,
|
|
"loss": 2.126572608947754,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 1.5367647058823528,
|
|
"grad_norm": 12.395544361341729,
|
|
"learning_rate": 5.661214636147676e-06,
|
|
"loss": 1.1755940914154053,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 1.5372899159663866,
|
|
"grad_norm": 9.61854386385847,
|
|
"learning_rate": 5.658185328879037e-06,
|
|
"loss": 1.557518482208252,
|
|
"step": 2927
|
|
},
|
|
{
|
|
"epoch": 1.53781512605042,
|
|
"grad_norm": 9.378644950617781,
|
|
"learning_rate": 5.6551557757312536e-06,
|
|
"loss": 1.422531008720398,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 1.5383403361344539,
|
|
"grad_norm": 11.002887083092874,
|
|
"learning_rate": 5.652125977836083e-06,
|
|
"loss": 1.052585244178772,
|
|
"step": 2929
|
|
},
|
|
{
|
|
"epoch": 1.5388655462184873,
|
|
"grad_norm": 8.084211781317082,
|
|
"learning_rate": 5.649095936325367e-06,
|
|
"loss": 1.8470664024353027,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 1.539390756302521,
|
|
"grad_norm": 9.200810971099617,
|
|
"learning_rate": 5.646065652331045e-06,
|
|
"loss": 2.062453269958496,
|
|
"step": 2931
|
|
},
|
|
{
|
|
"epoch": 1.5399159663865545,
|
|
"grad_norm": 17.16929349192554,
|
|
"learning_rate": 5.643035126985141e-06,
|
|
"loss": 1.3507643938064575,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 1.5404411764705883,
|
|
"grad_norm": 9.172600802244732,
|
|
"learning_rate": 5.640004361419776e-06,
|
|
"loss": 1.5793935060501099,
|
|
"step": 2933
|
|
},
|
|
{
|
|
"epoch": 1.5409663865546217,
|
|
"grad_norm": 12.898758593657767,
|
|
"learning_rate": 5.636973356767155e-06,
|
|
"loss": 1.768698811531067,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 1.5414915966386555,
|
|
"grad_norm": 7.5890227638448575,
|
|
"learning_rate": 5.633942114159574e-06,
|
|
"loss": 1.1846299171447754,
|
|
"step": 2935
|
|
},
|
|
{
|
|
"epoch": 1.542016806722689,
|
|
"grad_norm": 10.259227153472677,
|
|
"learning_rate": 5.630910634729418e-06,
|
|
"loss": 0.9566595554351807,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 1.5425420168067228,
|
|
"grad_norm": 13.925142766796322,
|
|
"learning_rate": 5.627878919609162e-06,
|
|
"loss": 0.9908967018127441,
|
|
"step": 2937
|
|
},
|
|
{
|
|
"epoch": 1.5430672268907561,
|
|
"grad_norm": 13.667068248895111,
|
|
"learning_rate": 5.6248469699313664e-06,
|
|
"loss": 1.5293811559677124,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 1.54359243697479,
|
|
"grad_norm": 11.894972075141927,
|
|
"learning_rate": 5.621814786828683e-06,
|
|
"loss": 1.386354684829712,
|
|
"step": 2939
|
|
},
|
|
{
|
|
"epoch": 1.5441176470588234,
|
|
"grad_norm": 7.098368154191767,
|
|
"learning_rate": 5.618782371433844e-06,
|
|
"loss": 1.5454857349395752,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 1.5446428571428572,
|
|
"grad_norm": 13.395432727163477,
|
|
"learning_rate": 5.615749724879677e-06,
|
|
"loss": 1.8663475513458252,
|
|
"step": 2941
|
|
},
|
|
{
|
|
"epoch": 1.5451680672268906,
|
|
"grad_norm": 8.472009180366035,
|
|
"learning_rate": 5.6127168482990905e-06,
|
|
"loss": 1.6517784595489502,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 1.5456932773109244,
|
|
"grad_norm": 13.967163618889897,
|
|
"learning_rate": 5.609683742825078e-06,
|
|
"loss": 1.6994513273239136,
|
|
"step": 2943
|
|
},
|
|
{
|
|
"epoch": 1.5462184873949578,
|
|
"grad_norm": 9.92545576421003,
|
|
"learning_rate": 5.6066504095907225e-06,
|
|
"loss": 1.5950546264648438,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 1.5467436974789917,
|
|
"grad_norm": 12.846927435632132,
|
|
"learning_rate": 5.603616849729191e-06,
|
|
"loss": 1.1596665382385254,
|
|
"step": 2945
|
|
},
|
|
{
|
|
"epoch": 1.5472689075630253,
|
|
"grad_norm": 14.860186674245476,
|
|
"learning_rate": 5.600583064373733e-06,
|
|
"loss": 1.8125252723693848,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 1.5477941176470589,
|
|
"grad_norm": 17.166450175533903,
|
|
"learning_rate": 5.5975490546576834e-06,
|
|
"loss": 1.009368658065796,
|
|
"step": 2947
|
|
},
|
|
{
|
|
"epoch": 1.5483193277310925,
|
|
"grad_norm": 13.64043369742361,
|
|
"learning_rate": 5.594514821714462e-06,
|
|
"loss": 1.7966917753219604,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 1.548844537815126,
|
|
"grad_norm": 10.492051004075568,
|
|
"learning_rate": 5.591480366677571e-06,
|
|
"loss": 1.428999423980713,
|
|
"step": 2949
|
|
},
|
|
{
|
|
"epoch": 1.5493697478991597,
|
|
"grad_norm": 13.030384864945013,
|
|
"learning_rate": 5.588445690680596e-06,
|
|
"loss": 1.6084507703781128,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 1.5498949579831933,
|
|
"grad_norm": 10.52587210445605,
|
|
"learning_rate": 5.585410794857203e-06,
|
|
"loss": 1.3045061826705933,
|
|
"step": 2951
|
|
},
|
|
{
|
|
"epoch": 1.550420168067227,
|
|
"grad_norm": 13.819258830455226,
|
|
"learning_rate": 5.582375680341144e-06,
|
|
"loss": 1.658186435699463,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 1.5509453781512605,
|
|
"grad_norm": 14.27016281304172,
|
|
"learning_rate": 5.579340348266251e-06,
|
|
"loss": 1.7536265850067139,
|
|
"step": 2953
|
|
},
|
|
{
|
|
"epoch": 1.5514705882352942,
|
|
"grad_norm": 15.953136280416974,
|
|
"learning_rate": 5.576304799766436e-06,
|
|
"loss": 2.103753089904785,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 1.5519957983193278,
|
|
"grad_norm": 11.989180422717773,
|
|
"learning_rate": 5.5732690359756906e-06,
|
|
"loss": 1.8845781087875366,
|
|
"step": 2955
|
|
},
|
|
{
|
|
"epoch": 1.5525210084033614,
|
|
"grad_norm": 10.435327143905482,
|
|
"learning_rate": 5.570233058028092e-06,
|
|
"loss": 2.2095136642456055,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 1.553046218487395,
|
|
"grad_norm": 18.792925731063573,
|
|
"learning_rate": 5.5671968670577935e-06,
|
|
"loss": 1.4067503213882446,
|
|
"step": 2957
|
|
},
|
|
{
|
|
"epoch": 1.5535714285714286,
|
|
"grad_norm": 14.986631022612844,
|
|
"learning_rate": 5.564160464199029e-06,
|
|
"loss": 1.8566360473632812,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 1.5540966386554622,
|
|
"grad_norm": 10.1765285542977,
|
|
"learning_rate": 5.5611238505861094e-06,
|
|
"loss": 1.1972577571868896,
|
|
"step": 2959
|
|
},
|
|
{
|
|
"epoch": 1.5546218487394958,
|
|
"grad_norm": 9.902480520159905,
|
|
"learning_rate": 5.55808702735343e-06,
|
|
"loss": 1.118326187133789,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 1.5551470588235294,
|
|
"grad_norm": 9.29939364842904,
|
|
"learning_rate": 5.55504999563546e-06,
|
|
"loss": 1.4431086778640747,
|
|
"step": 2961
|
|
},
|
|
{
|
|
"epoch": 1.555672268907563,
|
|
"grad_norm": 13.345996711773882,
|
|
"learning_rate": 5.552012756566745e-06,
|
|
"loss": 0.9997822642326355,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 1.5561974789915967,
|
|
"grad_norm": 9.198928087670938,
|
|
"learning_rate": 5.548975311281911e-06,
|
|
"loss": 1.9770004749298096,
|
|
"step": 2963
|
|
},
|
|
{
|
|
"epoch": 1.5567226890756303,
|
|
"grad_norm": 7.7037085052309,
|
|
"learning_rate": 5.5459376609156625e-06,
|
|
"loss": 1.7865469455718994,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 1.5572478991596639,
|
|
"grad_norm": 16.600075656757618,
|
|
"learning_rate": 5.542899806602776e-06,
|
|
"loss": 1.614283800125122,
|
|
"step": 2965
|
|
},
|
|
{
|
|
"epoch": 1.5577731092436975,
|
|
"grad_norm": 13.040205367211637,
|
|
"learning_rate": 5.539861749478107e-06,
|
|
"loss": 1.5482186079025269,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 1.558298319327731,
|
|
"grad_norm": 11.213426120437903,
|
|
"learning_rate": 5.5368234906765874e-06,
|
|
"loss": 1.320069432258606,
|
|
"step": 2967
|
|
},
|
|
{
|
|
"epoch": 1.5588235294117647,
|
|
"grad_norm": 7.343748682897271,
|
|
"learning_rate": 5.533785031333224e-06,
|
|
"loss": 1.4869799613952637,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 1.5593487394957983,
|
|
"grad_norm": 8.219843277676649,
|
|
"learning_rate": 5.530746372583097e-06,
|
|
"loss": 1.561574935913086,
|
|
"step": 2969
|
|
},
|
|
{
|
|
"epoch": 1.559873949579832,
|
|
"grad_norm": 10.486421831260104,
|
|
"learning_rate": 5.52770751556136e-06,
|
|
"loss": 1.6030337810516357,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 1.5603991596638656,
|
|
"grad_norm": 7.2914544240777905,
|
|
"learning_rate": 5.524668461403247e-06,
|
|
"loss": 1.5702407360076904,
|
|
"step": 2971
|
|
},
|
|
{
|
|
"epoch": 1.5609243697478992,
|
|
"grad_norm": 9.967087925098134,
|
|
"learning_rate": 5.521629211244058e-06,
|
|
"loss": 1.2014076709747314,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 1.5614495798319328,
|
|
"grad_norm": 58.633708588583836,
|
|
"learning_rate": 5.518589766219173e-06,
|
|
"loss": 4.672181606292725,
|
|
"step": 2973
|
|
},
|
|
{
|
|
"epoch": 1.5619747899159664,
|
|
"grad_norm": 12.196714902900535,
|
|
"learning_rate": 5.515550127464035e-06,
|
|
"loss": 1.6894347667694092,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 1.5625,
|
|
"grad_norm": 8.684063387615494,
|
|
"learning_rate": 5.512510296114174e-06,
|
|
"loss": 1.867067575454712,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 1.5630252100840336,
|
|
"grad_norm": 10.141827754464805,
|
|
"learning_rate": 5.509470273305179e-06,
|
|
"loss": 2.030630111694336,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 1.5635504201680672,
|
|
"grad_norm": 20.41224438593559,
|
|
"learning_rate": 5.506430060172714e-06,
|
|
"loss": 1.8465218544006348,
|
|
"step": 2977
|
|
},
|
|
{
|
|
"epoch": 1.5640756302521008,
|
|
"grad_norm": 9.860469747935166,
|
|
"learning_rate": 5.503389657852519e-06,
|
|
"loss": 2.35500431060791,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 1.5646008403361344,
|
|
"grad_norm": 9.985572325872047,
|
|
"learning_rate": 5.5003490674804e-06,
|
|
"loss": 1.1961085796356201,
|
|
"step": 2979
|
|
},
|
|
{
|
|
"epoch": 1.565126050420168,
|
|
"grad_norm": 16.36883349113698,
|
|
"learning_rate": 5.4973082901922325e-06,
|
|
"loss": 1.2019355297088623,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 1.5656512605042017,
|
|
"grad_norm": 8.020900276544133,
|
|
"learning_rate": 5.494267327123965e-06,
|
|
"loss": 1.6488561630249023,
|
|
"step": 2981
|
|
},
|
|
{
|
|
"epoch": 1.5661764705882353,
|
|
"grad_norm": 9.849217556069373,
|
|
"learning_rate": 5.491226179411614e-06,
|
|
"loss": 1.1081463098526,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 1.566701680672269,
|
|
"grad_norm": 8.847790649420688,
|
|
"learning_rate": 5.488184848191265e-06,
|
|
"loss": 1.8693208694458008,
|
|
"step": 2983
|
|
},
|
|
{
|
|
"epoch": 1.5672268907563025,
|
|
"grad_norm": 11.0020320932377,
|
|
"learning_rate": 5.485143334599071e-06,
|
|
"loss": 1.8414572477340698,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 1.5677521008403361,
|
|
"grad_norm": 8.230947119571999,
|
|
"learning_rate": 5.482101639771255e-06,
|
|
"loss": 1.2785500288009644,
|
|
"step": 2985
|
|
},
|
|
{
|
|
"epoch": 1.5682773109243697,
|
|
"grad_norm": 10.201231107069894,
|
|
"learning_rate": 5.479059764844107e-06,
|
|
"loss": 1.2749035358428955,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 1.5688025210084033,
|
|
"grad_norm": 13.983397280782574,
|
|
"learning_rate": 5.476017710953983e-06,
|
|
"loss": 1.3269745111465454,
|
|
"step": 2987
|
|
},
|
|
{
|
|
"epoch": 1.569327731092437,
|
|
"grad_norm": 13.733439163295257,
|
|
"learning_rate": 5.4729754792373094e-06,
|
|
"loss": 1.130143642425537,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 1.5698529411764706,
|
|
"grad_norm": 11.45075614439256,
|
|
"learning_rate": 5.469933070830574e-06,
|
|
"loss": 1.440808653831482,
|
|
"step": 2989
|
|
},
|
|
{
|
|
"epoch": 1.5703781512605042,
|
|
"grad_norm": 9.24461823293225,
|
|
"learning_rate": 5.466890486870335e-06,
|
|
"loss": 1.6493146419525146,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 1.5709033613445378,
|
|
"grad_norm": 9.325880222470737,
|
|
"learning_rate": 5.463847728493214e-06,
|
|
"loss": 1.273420810699463,
|
|
"step": 2991
|
|
},
|
|
{
|
|
"epoch": 1.5714285714285714,
|
|
"grad_norm": 10.582311245428702,
|
|
"learning_rate": 5.4608047968358965e-06,
|
|
"loss": 1.924250841140747,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 1.571953781512605,
|
|
"grad_norm": 11.29854702128637,
|
|
"learning_rate": 5.457761693035139e-06,
|
|
"loss": 1.616943597793579,
|
|
"step": 2993
|
|
},
|
|
{
|
|
"epoch": 1.5724789915966386,
|
|
"grad_norm": 13.24705526533712,
|
|
"learning_rate": 5.454718418227752e-06,
|
|
"loss": 1.4549560546875,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 1.5730042016806722,
|
|
"grad_norm": 16.67667661305697,
|
|
"learning_rate": 5.451674973550619e-06,
|
|
"loss": 1.9000680446624756,
|
|
"step": 2995
|
|
},
|
|
{
|
|
"epoch": 1.5735294117647058,
|
|
"grad_norm": 14.461185745097746,
|
|
"learning_rate": 5.448631360140683e-06,
|
|
"loss": 1.6825497150421143,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 1.5740546218487395,
|
|
"grad_norm": 9.206924388508579,
|
|
"learning_rate": 5.44558757913495e-06,
|
|
"loss": 1.6047449111938477,
|
|
"step": 2997
|
|
},
|
|
{
|
|
"epoch": 1.574579831932773,
|
|
"grad_norm": 15.300112344155286,
|
|
"learning_rate": 5.4425436316704905e-06,
|
|
"loss": 1.962047815322876,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 1.5751050420168067,
|
|
"grad_norm": 10.787994607732422,
|
|
"learning_rate": 5.439499518884433e-06,
|
|
"loss": 0.8550975322723389,
|
|
"step": 2999
|
|
},
|
|
{
|
|
"epoch": 1.5756302521008403,
|
|
"grad_norm": 12.515381428962108,
|
|
"learning_rate": 5.436455241913974e-06,
|
|
"loss": 2.231415271759033,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.576155462184874,
|
|
"grad_norm": 8.683070628759427,
|
|
"learning_rate": 5.433410801896366e-06,
|
|
"loss": 1.4393153190612793,
|
|
"step": 3001
|
|
},
|
|
{
|
|
"epoch": 1.5766806722689075,
|
|
"grad_norm": 22.534135031948075,
|
|
"learning_rate": 5.4303661999689265e-06,
|
|
"loss": 1.3972318172454834,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 1.5772058823529411,
|
|
"grad_norm": 13.98836802533251,
|
|
"learning_rate": 5.427321437269027e-06,
|
|
"loss": 1.539790391921997,
|
|
"step": 3003
|
|
},
|
|
{
|
|
"epoch": 1.5777310924369747,
|
|
"grad_norm": 12.131343502431259,
|
|
"learning_rate": 5.424276514934109e-06,
|
|
"loss": 1.2395825386047363,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 1.5782563025210083,
|
|
"grad_norm": 13.41694701583357,
|
|
"learning_rate": 5.4212314341016645e-06,
|
|
"loss": 1.933267593383789,
|
|
"step": 3005
|
|
},
|
|
{
|
|
"epoch": 1.5787815126050422,
|
|
"grad_norm": 12.739135132339845,
|
|
"learning_rate": 5.418186195909249e-06,
|
|
"loss": 2.522068738937378,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 1.5793067226890756,
|
|
"grad_norm": 12.596879198874607,
|
|
"learning_rate": 5.415140801494475e-06,
|
|
"loss": 0.8189308643341064,
|
|
"step": 3007
|
|
},
|
|
{
|
|
"epoch": 1.5798319327731094,
|
|
"grad_norm": 34.20284374633111,
|
|
"learning_rate": 5.412095251995017e-06,
|
|
"loss": 1.449036955833435,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 1.5803571428571428,
|
|
"grad_norm": 11.13227098032211,
|
|
"learning_rate": 5.409049548548604e-06,
|
|
"loss": 1.3301892280578613,
|
|
"step": 3009
|
|
},
|
|
{
|
|
"epoch": 1.5808823529411766,
|
|
"grad_norm": 51.155319989963644,
|
|
"learning_rate": 5.406003692293022e-06,
|
|
"loss": 1.5270702838897705,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 1.58140756302521,
|
|
"grad_norm": 10.342910703611519,
|
|
"learning_rate": 5.402957684366116e-06,
|
|
"loss": 1.5434386730194092,
|
|
"step": 3011
|
|
},
|
|
{
|
|
"epoch": 1.5819327731092439,
|
|
"grad_norm": 10.563548868112887,
|
|
"learning_rate": 5.399911525905787e-06,
|
|
"loss": 1.764906644821167,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 1.5824579831932772,
|
|
"grad_norm": 8.647277699602647,
|
|
"learning_rate": 5.396865218049995e-06,
|
|
"loss": 1.9067412614822388,
|
|
"step": 3013
|
|
},
|
|
{
|
|
"epoch": 1.582983193277311,
|
|
"grad_norm": 10.777840029312719,
|
|
"learning_rate": 5.393818761936749e-06,
|
|
"loss": 1.3426694869995117,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 1.5835084033613445,
|
|
"grad_norm": 15.586663362020502,
|
|
"learning_rate": 5.390772158704119e-06,
|
|
"loss": 1.3299593925476074,
|
|
"step": 3015
|
|
},
|
|
{
|
|
"epoch": 1.5840336134453783,
|
|
"grad_norm": 20.257025956666645,
|
|
"learning_rate": 5.387725409490231e-06,
|
|
"loss": 1.310839056968689,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 1.5845588235294117,
|
|
"grad_norm": 15.582753618919837,
|
|
"learning_rate": 5.38467851543326e-06,
|
|
"loss": 1.757559061050415,
|
|
"step": 3017
|
|
},
|
|
{
|
|
"epoch": 1.5850840336134455,
|
|
"grad_norm": 23.639658603172965,
|
|
"learning_rate": 5.381631477671439e-06,
|
|
"loss": 1.3903396129608154,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 1.585609243697479,
|
|
"grad_norm": 9.434937981744953,
|
|
"learning_rate": 5.378584297343053e-06,
|
|
"loss": 1.4597529172897339,
|
|
"step": 3019
|
|
},
|
|
{
|
|
"epoch": 1.5861344537815127,
|
|
"grad_norm": 17.80952656483005,
|
|
"learning_rate": 5.375536975586444e-06,
|
|
"loss": 2.0107779502868652,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 1.5866596638655461,
|
|
"grad_norm": 9.516485496132065,
|
|
"learning_rate": 5.3724895135400015e-06,
|
|
"loss": 1.5562787055969238,
|
|
"step": 3021
|
|
},
|
|
{
|
|
"epoch": 1.58718487394958,
|
|
"grad_norm": 9.009365409721646,
|
|
"learning_rate": 5.369441912342169e-06,
|
|
"loss": 1.2896162271499634,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 1.5877100840336134,
|
|
"grad_norm": 8.809121455676818,
|
|
"learning_rate": 5.366394173131445e-06,
|
|
"loss": 1.3252286911010742,
|
|
"step": 3023
|
|
},
|
|
{
|
|
"epoch": 1.5882352941176472,
|
|
"grad_norm": 15.278197336749606,
|
|
"learning_rate": 5.363346297046376e-06,
|
|
"loss": 0.9057502150535583,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 1.5887605042016806,
|
|
"grad_norm": 10.265475431681976,
|
|
"learning_rate": 5.360298285225564e-06,
|
|
"loss": 1.4344761371612549,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 1.5892857142857144,
|
|
"grad_norm": 8.980900818541313,
|
|
"learning_rate": 5.357250138807652e-06,
|
|
"loss": 1.5517914295196533,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 1.5898109243697478,
|
|
"grad_norm": 9.865844630288834,
|
|
"learning_rate": 5.354201858931348e-06,
|
|
"loss": 1.2274580001831055,
|
|
"step": 3027
|
|
},
|
|
{
|
|
"epoch": 1.5903361344537816,
|
|
"grad_norm": 17.581322571225986,
|
|
"learning_rate": 5.351153446735398e-06,
|
|
"loss": 2.2450833320617676,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 1.590861344537815,
|
|
"grad_norm": 10.983447659569535,
|
|
"learning_rate": 5.3481049033586e-06,
|
|
"loss": 1.5925889015197754,
|
|
"step": 3029
|
|
},
|
|
{
|
|
"epoch": 1.5913865546218489,
|
|
"grad_norm": 11.374823923313846,
|
|
"learning_rate": 5.345056229939802e-06,
|
|
"loss": 2.034247636795044,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 1.5919117647058822,
|
|
"grad_norm": 9.395067091317914,
|
|
"learning_rate": 5.342007427617906e-06,
|
|
"loss": 1.5270767211914062,
|
|
"step": 3031
|
|
},
|
|
{
|
|
"epoch": 1.592436974789916,
|
|
"grad_norm": 10.107363521608857,
|
|
"learning_rate": 5.338958497531852e-06,
|
|
"loss": 1.012595772743225,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 1.5929621848739495,
|
|
"grad_norm": 10.518752553859517,
|
|
"learning_rate": 5.335909440820635e-06,
|
|
"loss": 1.5243604183197021,
|
|
"step": 3033
|
|
},
|
|
{
|
|
"epoch": 1.5934873949579833,
|
|
"grad_norm": 9.787692364746867,
|
|
"learning_rate": 5.332860258623292e-06,
|
|
"loss": 0.798157811164856,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 1.5940126050420167,
|
|
"grad_norm": 7.431465451336366,
|
|
"learning_rate": 5.329810952078914e-06,
|
|
"loss": 1.5136232376098633,
|
|
"step": 3035
|
|
},
|
|
{
|
|
"epoch": 1.5945378151260505,
|
|
"grad_norm": 11.535268048562655,
|
|
"learning_rate": 5.326761522326633e-06,
|
|
"loss": 0.9737481474876404,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 1.595063025210084,
|
|
"grad_norm": 12.974696635069863,
|
|
"learning_rate": 5.323711970505627e-06,
|
|
"loss": 1.685730218887329,
|
|
"step": 3037
|
|
},
|
|
{
|
|
"epoch": 1.5955882352941178,
|
|
"grad_norm": 14.400191708556283,
|
|
"learning_rate": 5.320662297755123e-06,
|
|
"loss": 1.4592320919036865,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 1.5961134453781511,
|
|
"grad_norm": 10.848519871961543,
|
|
"learning_rate": 5.3176125052143905e-06,
|
|
"loss": 1.5373382568359375,
|
|
"step": 3039
|
|
},
|
|
{
|
|
"epoch": 1.596638655462185,
|
|
"grad_norm": 12.23179320445541,
|
|
"learning_rate": 5.314562594022744e-06,
|
|
"loss": 2.0527639389038086,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 1.5971638655462184,
|
|
"grad_norm": 11.62823977967023,
|
|
"learning_rate": 5.311512565319542e-06,
|
|
"loss": 1.5563435554504395,
|
|
"step": 3041
|
|
},
|
|
{
|
|
"epoch": 1.5976890756302522,
|
|
"grad_norm": 6.9020257481060625,
|
|
"learning_rate": 5.308462420244189e-06,
|
|
"loss": 1.3120524883270264,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 1.5982142857142856,
|
|
"grad_norm": 11.1950524441662,
|
|
"learning_rate": 5.305412159936133e-06,
|
|
"loss": 1.188920259475708,
|
|
"step": 3043
|
|
},
|
|
{
|
|
"epoch": 1.5987394957983194,
|
|
"grad_norm": 13.860457797728966,
|
|
"learning_rate": 5.302361785534861e-06,
|
|
"loss": 1.412747859954834,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 1.5992647058823528,
|
|
"grad_norm": 8.955326254548957,
|
|
"learning_rate": 5.299311298179904e-06,
|
|
"loss": 1.6942559480667114,
|
|
"step": 3045
|
|
},
|
|
{
|
|
"epoch": 1.5997899159663866,
|
|
"grad_norm": 15.0471789601037,
|
|
"learning_rate": 5.2962606990108415e-06,
|
|
"loss": 1.9774577617645264,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 1.60031512605042,
|
|
"grad_norm": 18.8562441769276,
|
|
"learning_rate": 5.293209989167286e-06,
|
|
"loss": 1.4774854183197021,
|
|
"step": 3047
|
|
},
|
|
{
|
|
"epoch": 1.6008403361344539,
|
|
"grad_norm": 7.800871805592767,
|
|
"learning_rate": 5.290159169788895e-06,
|
|
"loss": 1.2230358123779297,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 1.6013655462184873,
|
|
"grad_norm": 10.408727190375572,
|
|
"learning_rate": 5.287108242015371e-06,
|
|
"loss": 1.5123305320739746,
|
|
"step": 3049
|
|
},
|
|
{
|
|
"epoch": 1.601890756302521,
|
|
"grad_norm": 10.773882300648125,
|
|
"learning_rate": 5.284057206986449e-06,
|
|
"loss": 1.538888692855835,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 1.6024159663865545,
|
|
"grad_norm": 12.521891812571724,
|
|
"learning_rate": 5.2810060658419095e-06,
|
|
"loss": 1.007758378982544,
|
|
"step": 3051
|
|
},
|
|
{
|
|
"epoch": 1.6029411764705883,
|
|
"grad_norm": 12.84736092836692,
|
|
"learning_rate": 5.277954819721569e-06,
|
|
"loss": 1.2584798336029053,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 1.6034663865546217,
|
|
"grad_norm": 20.570205363136893,
|
|
"learning_rate": 5.27490346976529e-06,
|
|
"loss": 1.558570146560669,
|
|
"step": 3053
|
|
},
|
|
{
|
|
"epoch": 1.6039915966386555,
|
|
"grad_norm": 8.336119242439366,
|
|
"learning_rate": 5.2718520171129664e-06,
|
|
"loss": 1.7295467853546143,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 1.604516806722689,
|
|
"grad_norm": 20.642779786925278,
|
|
"learning_rate": 5.268800462904533e-06,
|
|
"loss": 1.83451247215271,
|
|
"step": 3055
|
|
},
|
|
{
|
|
"epoch": 1.6050420168067228,
|
|
"grad_norm": 12.38385381998978,
|
|
"learning_rate": 5.265748808279963e-06,
|
|
"loss": 2.1253252029418945,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 1.6055672268907561,
|
|
"grad_norm": 18.712846899400745,
|
|
"learning_rate": 5.2626970543792685e-06,
|
|
"loss": 1.9685165882110596,
|
|
"step": 3057
|
|
},
|
|
{
|
|
"epoch": 1.60609243697479,
|
|
"grad_norm": 11.370481775078325,
|
|
"learning_rate": 5.259645202342496e-06,
|
|
"loss": 1.7771251201629639,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 1.6066176470588234,
|
|
"grad_norm": 19.03876477951974,
|
|
"learning_rate": 5.256593253309728e-06,
|
|
"loss": 1.5930149555206299,
|
|
"step": 3059
|
|
},
|
|
{
|
|
"epoch": 1.6071428571428572,
|
|
"grad_norm": 10.254101681931608,
|
|
"learning_rate": 5.25354120842109e-06,
|
|
"loss": 1.1894960403442383,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 1.6076680672268906,
|
|
"grad_norm": 10.653231763764932,
|
|
"learning_rate": 5.250489068816734e-06,
|
|
"loss": 1.458434820175171,
|
|
"step": 3061
|
|
},
|
|
{
|
|
"epoch": 1.6081932773109244,
|
|
"grad_norm": 10.829776918269657,
|
|
"learning_rate": 5.247436835636853e-06,
|
|
"loss": 0.9005193710327148,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 1.6087184873949578,
|
|
"grad_norm": 10.326349390118326,
|
|
"learning_rate": 5.244384510021673e-06,
|
|
"loss": 1.6701995134353638,
|
|
"step": 3063
|
|
},
|
|
{
|
|
"epoch": 1.6092436974789917,
|
|
"grad_norm": 13.18199371129073,
|
|
"learning_rate": 5.241332093111457e-06,
|
|
"loss": 1.5033565759658813,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 1.6097689075630253,
|
|
"grad_norm": 9.466469794566347,
|
|
"learning_rate": 5.238279586046499e-06,
|
|
"loss": 1.618055820465088,
|
|
"step": 3065
|
|
},
|
|
{
|
|
"epoch": 1.6102941176470589,
|
|
"grad_norm": 7.960384961155846,
|
|
"learning_rate": 5.235226989967129e-06,
|
|
"loss": 1.7577903270721436,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 1.6108193277310925,
|
|
"grad_norm": 8.303735290895537,
|
|
"learning_rate": 5.232174306013706e-06,
|
|
"loss": 1.4577488899230957,
|
|
"step": 3067
|
|
},
|
|
{
|
|
"epoch": 1.611344537815126,
|
|
"grad_norm": 9.683235980279202,
|
|
"learning_rate": 5.2291215353266315e-06,
|
|
"loss": 1.25355863571167,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 1.6118697478991597,
|
|
"grad_norm": 10.863844807133503,
|
|
"learning_rate": 5.226068679046327e-06,
|
|
"loss": 1.4512593746185303,
|
|
"step": 3069
|
|
},
|
|
{
|
|
"epoch": 1.6123949579831933,
|
|
"grad_norm": 15.636049396995174,
|
|
"learning_rate": 5.223015738313254e-06,
|
|
"loss": 1.479020595550537,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 1.612920168067227,
|
|
"grad_norm": 12.599607237865373,
|
|
"learning_rate": 5.219962714267903e-06,
|
|
"loss": 1.4065523147583008,
|
|
"step": 3071
|
|
},
|
|
{
|
|
"epoch": 1.6134453781512605,
|
|
"grad_norm": 12.051796673506267,
|
|
"learning_rate": 5.2169096080507975e-06,
|
|
"loss": 1.7732646465301514,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 1.6139705882352942,
|
|
"grad_norm": 11.207618487420092,
|
|
"learning_rate": 5.21385642080249e-06,
|
|
"loss": 1.3979735374450684,
|
|
"step": 3073
|
|
},
|
|
{
|
|
"epoch": 1.6144957983193278,
|
|
"grad_norm": 7.1466018349447795,
|
|
"learning_rate": 5.2108031536635614e-06,
|
|
"loss": 1.436557412147522,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 1.6150210084033614,
|
|
"grad_norm": 11.01119895282242,
|
|
"learning_rate": 5.2077498077746295e-06,
|
|
"loss": 1.8917148113250732,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 1.615546218487395,
|
|
"grad_norm": 6.396958668584958,
|
|
"learning_rate": 5.204696384276332e-06,
|
|
"loss": 1.7135422229766846,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 1.6160714285714286,
|
|
"grad_norm": 11.435546743896237,
|
|
"learning_rate": 5.201642884309341e-06,
|
|
"loss": 2.1062474250793457,
|
|
"step": 3077
|
|
},
|
|
{
|
|
"epoch": 1.6165966386554622,
|
|
"grad_norm": 8.398390970244984,
|
|
"learning_rate": 5.198589309014358e-06,
|
|
"loss": 1.3851127624511719,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 1.6171218487394958,
|
|
"grad_norm": 10.678438945745349,
|
|
"learning_rate": 5.195535659532111e-06,
|
|
"loss": 2.4275312423706055,
|
|
"step": 3079
|
|
},
|
|
{
|
|
"epoch": 1.6176470588235294,
|
|
"grad_norm": 11.411212604789487,
|
|
"learning_rate": 5.192481937003354e-06,
|
|
"loss": 1.3018128871917725,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 1.618172268907563,
|
|
"grad_norm": 13.5116399134382,
|
|
"learning_rate": 5.189428142568872e-06,
|
|
"loss": 1.7556755542755127,
|
|
"step": 3081
|
|
},
|
|
{
|
|
"epoch": 1.6186974789915967,
|
|
"grad_norm": 17.603190116269715,
|
|
"learning_rate": 5.186374277369474e-06,
|
|
"loss": 1.702657699584961,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 1.6192226890756303,
|
|
"grad_norm": 10.3907493586627,
|
|
"learning_rate": 5.183320342545995e-06,
|
|
"loss": 1.9639397859573364,
|
|
"step": 3083
|
|
},
|
|
{
|
|
"epoch": 1.6197478991596639,
|
|
"grad_norm": 13.03413836769717,
|
|
"learning_rate": 5.180266339239301e-06,
|
|
"loss": 1.04927396774292,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 1.6202731092436975,
|
|
"grad_norm": 8.706697489285661,
|
|
"learning_rate": 5.177212268590277e-06,
|
|
"loss": 2.341620445251465,
|
|
"step": 3085
|
|
},
|
|
{
|
|
"epoch": 1.620798319327731,
|
|
"grad_norm": 10.577233595092286,
|
|
"learning_rate": 5.174158131739837e-06,
|
|
"loss": 1.6827950477600098,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 1.6213235294117647,
|
|
"grad_norm": 8.615706829388369,
|
|
"learning_rate": 5.171103929828919e-06,
|
|
"loss": 1.5805842876434326,
|
|
"step": 3087
|
|
},
|
|
{
|
|
"epoch": 1.6218487394957983,
|
|
"grad_norm": 7.718213532602144,
|
|
"learning_rate": 5.168049663998485e-06,
|
|
"loss": 2.093625068664551,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 1.622373949579832,
|
|
"grad_norm": 10.10945225111664,
|
|
"learning_rate": 5.16499533538952e-06,
|
|
"loss": 1.1579737663269043,
|
|
"step": 3089
|
|
},
|
|
{
|
|
"epoch": 1.6228991596638656,
|
|
"grad_norm": 8.908457376782547,
|
|
"learning_rate": 5.161940945143036e-06,
|
|
"loss": 1.6166542768478394,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 1.6234243697478992,
|
|
"grad_norm": 13.615864412530772,
|
|
"learning_rate": 5.158886494400062e-06,
|
|
"loss": 1.6508545875549316,
|
|
"step": 3091
|
|
},
|
|
{
|
|
"epoch": 1.6239495798319328,
|
|
"grad_norm": 24.439336687364076,
|
|
"learning_rate": 5.155831984301657e-06,
|
|
"loss": 1.3991830348968506,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 1.6244747899159664,
|
|
"grad_norm": 8.583808073820869,
|
|
"learning_rate": 5.152777415988894e-06,
|
|
"loss": 1.5706121921539307,
|
|
"step": 3093
|
|
},
|
|
{
|
|
"epoch": 1.625,
|
|
"grad_norm": 15.653129337077532,
|
|
"learning_rate": 5.1497227906028764e-06,
|
|
"loss": 1.4092233180999756,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 1.6255252100840336,
|
|
"grad_norm": 10.34787712355263,
|
|
"learning_rate": 5.146668109284723e-06,
|
|
"loss": 1.3636889457702637,
|
|
"step": 3095
|
|
},
|
|
{
|
|
"epoch": 1.6260504201680672,
|
|
"grad_norm": 10.599082953760776,
|
|
"learning_rate": 5.143613373175573e-06,
|
|
"loss": 2.0063517093658447,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 1.6265756302521008,
|
|
"grad_norm": 12.200199833393542,
|
|
"learning_rate": 5.140558583416591e-06,
|
|
"loss": 1.997770071029663,
|
|
"step": 3097
|
|
},
|
|
{
|
|
"epoch": 1.6271008403361344,
|
|
"grad_norm": 15.849921841994798,
|
|
"learning_rate": 5.137503741148957e-06,
|
|
"loss": 1.7975128889083862,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 1.627626050420168,
|
|
"grad_norm": 9.632688041383286,
|
|
"learning_rate": 5.134448847513873e-06,
|
|
"loss": 1.5118117332458496,
|
|
"step": 3099
|
|
},
|
|
{
|
|
"epoch": 1.6281512605042017,
|
|
"grad_norm": 9.128526242946805,
|
|
"learning_rate": 5.1313939036525585e-06,
|
|
"loss": 0.9926509857177734,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 1.6286764705882353,
|
|
"grad_norm": 12.285035917057023,
|
|
"learning_rate": 5.128338910706254e-06,
|
|
"loss": 1.096555233001709,
|
|
"step": 3101
|
|
},
|
|
{
|
|
"epoch": 1.629201680672269,
|
|
"grad_norm": 10.10042325655656,
|
|
"learning_rate": 5.125283869816218e-06,
|
|
"loss": 1.4132643938064575,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 1.6297268907563025,
|
|
"grad_norm": 24.57875669580783,
|
|
"learning_rate": 5.122228782123723e-06,
|
|
"loss": 1.9915111064910889,
|
|
"step": 3103
|
|
},
|
|
{
|
|
"epoch": 1.6302521008403361,
|
|
"grad_norm": 17.48422070876946,
|
|
"learning_rate": 5.119173648770065e-06,
|
|
"loss": 1.8281275033950806,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 1.6307773109243697,
|
|
"grad_norm": 14.307260743404967,
|
|
"learning_rate": 5.1161184708965525e-06,
|
|
"loss": 1.4649648666381836,
|
|
"step": 3105
|
|
},
|
|
{
|
|
"epoch": 1.6313025210084033,
|
|
"grad_norm": 15.466741659182713,
|
|
"learning_rate": 5.113063249644514e-06,
|
|
"loss": 1.4001120328903198,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 1.631827731092437,
|
|
"grad_norm": 9.286672187247927,
|
|
"learning_rate": 5.110007986155291e-06,
|
|
"loss": 1.1792571544647217,
|
|
"step": 3107
|
|
},
|
|
{
|
|
"epoch": 1.6323529411764706,
|
|
"grad_norm": 7.957869759941225,
|
|
"learning_rate": 5.106952681570242e-06,
|
|
"loss": 1.5156265497207642,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 1.6328781512605042,
|
|
"grad_norm": 14.367417229332117,
|
|
"learning_rate": 5.103897337030742e-06,
|
|
"loss": 1.281886339187622,
|
|
"step": 3109
|
|
},
|
|
{
|
|
"epoch": 1.6334033613445378,
|
|
"grad_norm": 11.117524175921625,
|
|
"learning_rate": 5.10084195367818e-06,
|
|
"loss": 1.5550462007522583,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 1.6339285714285714,
|
|
"grad_norm": 6.486761326314236,
|
|
"learning_rate": 5.097786532653959e-06,
|
|
"loss": 1.6664419174194336,
|
|
"step": 3111
|
|
},
|
|
{
|
|
"epoch": 1.634453781512605,
|
|
"grad_norm": 9.371319968744523,
|
|
"learning_rate": 5.094731075099496e-06,
|
|
"loss": 1.7581782341003418,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 1.6349789915966386,
|
|
"grad_norm": 9.595668596262676,
|
|
"learning_rate": 5.091675582156224e-06,
|
|
"loss": 0.9803217649459839,
|
|
"step": 3113
|
|
},
|
|
{
|
|
"epoch": 1.6355042016806722,
|
|
"grad_norm": 12.477620939681469,
|
|
"learning_rate": 5.088620054965585e-06,
|
|
"loss": 0.9094586372375488,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 1.6360294117647058,
|
|
"grad_norm": 8.432020340556203,
|
|
"learning_rate": 5.0855644946690385e-06,
|
|
"loss": 2.6701955795288086,
|
|
"step": 3115
|
|
},
|
|
{
|
|
"epoch": 1.6365546218487395,
|
|
"grad_norm": 12.544280458189542,
|
|
"learning_rate": 5.082508902408053e-06,
|
|
"loss": 3.882866144180298,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 1.637079831932773,
|
|
"grad_norm": 12.564515372602196,
|
|
"learning_rate": 5.07945327932411e-06,
|
|
"loss": 1.5386974811553955,
|
|
"step": 3117
|
|
},
|
|
{
|
|
"epoch": 1.6376050420168067,
|
|
"grad_norm": 12.879838017395324,
|
|
"learning_rate": 5.076397626558704e-06,
|
|
"loss": 2.0772528648376465,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 1.6381302521008403,
|
|
"grad_norm": 34.9462007007443,
|
|
"learning_rate": 5.073341945253336e-06,
|
|
"loss": 1.662086844444275,
|
|
"step": 3119
|
|
},
|
|
{
|
|
"epoch": 1.638655462184874,
|
|
"grad_norm": 12.641927607679888,
|
|
"learning_rate": 5.0702862365495245e-06,
|
|
"loss": 1.4871139526367188,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 1.6391806722689075,
|
|
"grad_norm": 18.879200948971203,
|
|
"learning_rate": 5.067230501588792e-06,
|
|
"loss": 1.8591716289520264,
|
|
"step": 3121
|
|
},
|
|
{
|
|
"epoch": 1.6397058823529411,
|
|
"grad_norm": 17.96823659271845,
|
|
"learning_rate": 5.0641747415126755e-06,
|
|
"loss": 1.3843923807144165,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 1.6402310924369747,
|
|
"grad_norm": 14.323540358602784,
|
|
"learning_rate": 5.061118957462716e-06,
|
|
"loss": 1.3466473817825317,
|
|
"step": 3123
|
|
},
|
|
{
|
|
"epoch": 1.6407563025210083,
|
|
"grad_norm": 18.644907363918527,
|
|
"learning_rate": 5.05806315058047e-06,
|
|
"loss": 1.204024314880371,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 1.6412815126050422,
|
|
"grad_norm": 8.132668098754044,
|
|
"learning_rate": 5.055007322007497e-06,
|
|
"loss": 1.843780755996704,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 1.6418067226890756,
|
|
"grad_norm": 13.460077018059597,
|
|
"learning_rate": 5.051951472885368e-06,
|
|
"loss": 1.2028506994247437,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 1.6423319327731094,
|
|
"grad_norm": 11.506403572417291,
|
|
"learning_rate": 5.0488956043556604e-06,
|
|
"loss": 1.1519665718078613,
|
|
"step": 3127
|
|
},
|
|
{
|
|
"epoch": 1.6428571428571428,
|
|
"grad_norm": 14.578043514916429,
|
|
"learning_rate": 5.045839717559958e-06,
|
|
"loss": 1.559743046760559,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 1.6433823529411766,
|
|
"grad_norm": 15.560723367508388,
|
|
"learning_rate": 5.0427838136398545e-06,
|
|
"loss": 1.5789740085601807,
|
|
"step": 3129
|
|
},
|
|
{
|
|
"epoch": 1.64390756302521,
|
|
"grad_norm": 11.434104869431177,
|
|
"learning_rate": 5.039727893736945e-06,
|
|
"loss": 1.1219725608825684,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 1.6444327731092439,
|
|
"grad_norm": 11.436070994270105,
|
|
"learning_rate": 5.036671958992836e-06,
|
|
"loss": 1.9839292764663696,
|
|
"step": 3131
|
|
},
|
|
{
|
|
"epoch": 1.6449579831932772,
|
|
"grad_norm": 11.96687655010819,
|
|
"learning_rate": 5.033616010549135e-06,
|
|
"loss": 1.3363629579544067,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 1.645483193277311,
|
|
"grad_norm": 12.623420160559558,
|
|
"learning_rate": 5.0305600495474586e-06,
|
|
"loss": 1.426746129989624,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 1.6460084033613445,
|
|
"grad_norm": 12.38490349651787,
|
|
"learning_rate": 5.027504077129424e-06,
|
|
"loss": 1.629065990447998,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 1.6465336134453783,
|
|
"grad_norm": 18.971260479020643,
|
|
"learning_rate": 5.0244480944366555e-06,
|
|
"loss": 1.889040470123291,
|
|
"step": 3135
|
|
},
|
|
{
|
|
"epoch": 1.6470588235294117,
|
|
"grad_norm": 8.304161013849223,
|
|
"learning_rate": 5.021392102610782e-06,
|
|
"loss": 1.562378168106079,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 1.6475840336134455,
|
|
"grad_norm": 9.3929417613621,
|
|
"learning_rate": 5.018336102793433e-06,
|
|
"loss": 1.1140542030334473,
|
|
"step": 3137
|
|
},
|
|
{
|
|
"epoch": 1.648109243697479,
|
|
"grad_norm": 8.893889734172177,
|
|
"learning_rate": 5.015280096126242e-06,
|
|
"loss": 1.2760379314422607,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 1.6486344537815127,
|
|
"grad_norm": 8.2080442981099,
|
|
"learning_rate": 5.012224083750845e-06,
|
|
"loss": 0.7374924421310425,
|
|
"step": 3139
|
|
},
|
|
{
|
|
"epoch": 1.6491596638655461,
|
|
"grad_norm": 10.475997502549836,
|
|
"learning_rate": 5.009168066808883e-06,
|
|
"loss": 1.6853827238082886,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 1.64968487394958,
|
|
"grad_norm": 10.022161747941537,
|
|
"learning_rate": 5.006112046441993e-06,
|
|
"loss": 1.3828448057174683,
|
|
"step": 3141
|
|
},
|
|
{
|
|
"epoch": 1.6502100840336134,
|
|
"grad_norm": 13.963782723901204,
|
|
"learning_rate": 5.003056023791818e-06,
|
|
"loss": 1.758962631225586,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 1.6507352941176472,
|
|
"grad_norm": 13.49899842100994,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.9127230644226074,
|
|
"step": 3143
|
|
},
|
|
{
|
|
"epoch": 1.6512605042016806,
|
|
"grad_norm": 10.142974274623832,
|
|
"learning_rate": 4.996943976208184e-06,
|
|
"loss": 1.7255250215530396,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 1.6517857142857144,
|
|
"grad_norm": 12.893091416940615,
|
|
"learning_rate": 4.993887953558008e-06,
|
|
"loss": 1.6206003427505493,
|
|
"step": 3145
|
|
},
|
|
{
|
|
"epoch": 1.6523109243697478,
|
|
"grad_norm": 8.755376382428233,
|
|
"learning_rate": 4.990831933191119e-06,
|
|
"loss": 1.5673696994781494,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 1.6528361344537816,
|
|
"grad_norm": 12.819958677420079,
|
|
"learning_rate": 4.987775916249157e-06,
|
|
"loss": 1.5775996446609497,
|
|
"step": 3147
|
|
},
|
|
{
|
|
"epoch": 1.653361344537815,
|
|
"grad_norm": 9.980497121979969,
|
|
"learning_rate": 4.98471990387376e-06,
|
|
"loss": 1.0711749792099,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 1.6538865546218489,
|
|
"grad_norm": 16.850694249864684,
|
|
"learning_rate": 4.981663897206568e-06,
|
|
"loss": 1.4993646144866943,
|
|
"step": 3149
|
|
},
|
|
{
|
|
"epoch": 1.6544117647058822,
|
|
"grad_norm": 16.7741948805016,
|
|
"learning_rate": 4.9786078973892195e-06,
|
|
"loss": 1.149838924407959,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 1.654936974789916,
|
|
"grad_norm": 12.130252797235269,
|
|
"learning_rate": 4.975551905563345e-06,
|
|
"loss": 1.384533166885376,
|
|
"step": 3151
|
|
},
|
|
{
|
|
"epoch": 1.6554621848739495,
|
|
"grad_norm": 25.324741934323857,
|
|
"learning_rate": 4.9724959228705776e-06,
|
|
"loss": 1.4310171604156494,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 1.6559873949579833,
|
|
"grad_norm": 10.197357937884343,
|
|
"learning_rate": 4.969439950452543e-06,
|
|
"loss": 1.5560858249664307,
|
|
"step": 3153
|
|
},
|
|
{
|
|
"epoch": 1.6565126050420167,
|
|
"grad_norm": 8.991127525865842,
|
|
"learning_rate": 4.966383989450866e-06,
|
|
"loss": 1.1132553815841675,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 1.6570378151260505,
|
|
"grad_norm": 12.927546827610996,
|
|
"learning_rate": 4.963328041007166e-06,
|
|
"loss": 1.6278947591781616,
|
|
"step": 3155
|
|
},
|
|
{
|
|
"epoch": 1.657563025210084,
|
|
"grad_norm": 17.68273433728424,
|
|
"learning_rate": 4.960272106263056e-06,
|
|
"loss": 1.769431471824646,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 1.6580882352941178,
|
|
"grad_norm": 9.760403079756896,
|
|
"learning_rate": 4.957216186360147e-06,
|
|
"loss": 1.55194091796875,
|
|
"step": 3157
|
|
},
|
|
{
|
|
"epoch": 1.6586134453781511,
|
|
"grad_norm": 16.400495262986592,
|
|
"learning_rate": 4.954160282440043e-06,
|
|
"loss": 2.3243887424468994,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 1.659138655462185,
|
|
"grad_norm": 12.452626182995566,
|
|
"learning_rate": 4.951104395644342e-06,
|
|
"loss": 1.3925807476043701,
|
|
"step": 3159
|
|
},
|
|
{
|
|
"epoch": 1.6596638655462184,
|
|
"grad_norm": 9.727453645049202,
|
|
"learning_rate": 4.948048527114633e-06,
|
|
"loss": 1.1422160863876343,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 1.6601890756302522,
|
|
"grad_norm": 8.757952499913177,
|
|
"learning_rate": 4.944992677992505e-06,
|
|
"loss": 1.4326893091201782,
|
|
"step": 3161
|
|
},
|
|
{
|
|
"epoch": 1.6607142857142856,
|
|
"grad_norm": 11.48282641238962,
|
|
"learning_rate": 4.941936849419532e-06,
|
|
"loss": 1.1868253946304321,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 1.6612394957983194,
|
|
"grad_norm": 17.28460228596912,
|
|
"learning_rate": 4.938881042537286e-06,
|
|
"loss": 1.201799750328064,
|
|
"step": 3163
|
|
},
|
|
{
|
|
"epoch": 1.6617647058823528,
|
|
"grad_norm": 14.028568607468157,
|
|
"learning_rate": 4.935825258487326e-06,
|
|
"loss": 1.7789372205734253,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 1.6622899159663866,
|
|
"grad_norm": 11.097269911557811,
|
|
"learning_rate": 4.932769498411209e-06,
|
|
"loss": 0.9179167151451111,
|
|
"step": 3165
|
|
},
|
|
{
|
|
"epoch": 1.66281512605042,
|
|
"grad_norm": 11.915851912764433,
|
|
"learning_rate": 4.929713763450477e-06,
|
|
"loss": 1.5717015266418457,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 1.6633403361344539,
|
|
"grad_norm": 10.38024475807239,
|
|
"learning_rate": 4.926658054746665e-06,
|
|
"loss": 1.34568190574646,
|
|
"step": 3167
|
|
},
|
|
{
|
|
"epoch": 1.6638655462184873,
|
|
"grad_norm": 9.239344551818904,
|
|
"learning_rate": 4.923602373441297e-06,
|
|
"loss": 2.074063539505005,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 1.664390756302521,
|
|
"grad_norm": 13.857889716143573,
|
|
"learning_rate": 4.9205467206758914e-06,
|
|
"loss": 1.703376054763794,
|
|
"step": 3169
|
|
},
|
|
{
|
|
"epoch": 1.6649159663865545,
|
|
"grad_norm": 10.877505036281866,
|
|
"learning_rate": 4.917491097591949e-06,
|
|
"loss": 1.6291817426681519,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 1.6654411764705883,
|
|
"grad_norm": 8.301892458791308,
|
|
"learning_rate": 4.914435505330962e-06,
|
|
"loss": 1.6869958639144897,
|
|
"step": 3171
|
|
},
|
|
{
|
|
"epoch": 1.6659663865546217,
|
|
"grad_norm": 7.734194438514324,
|
|
"learning_rate": 4.911379945034416e-06,
|
|
"loss": 1.8701092004776,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 1.6664915966386555,
|
|
"grad_norm": 14.316183760785892,
|
|
"learning_rate": 4.908324417843779e-06,
|
|
"loss": 1.4611387252807617,
|
|
"step": 3173
|
|
},
|
|
{
|
|
"epoch": 1.667016806722689,
|
|
"grad_norm": 9.62109786158016,
|
|
"learning_rate": 4.905268924900506e-06,
|
|
"loss": 2.080125331878662,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 1.6675420168067228,
|
|
"grad_norm": 14.680946727705855,
|
|
"learning_rate": 4.902213467346043e-06,
|
|
"loss": 1.9285434484481812,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 1.6680672268907561,
|
|
"grad_norm": 11.629182883064736,
|
|
"learning_rate": 4.899158046321821e-06,
|
|
"loss": 1.3539671897888184,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 1.66859243697479,
|
|
"grad_norm": 13.737142860943834,
|
|
"learning_rate": 4.896102662969259e-06,
|
|
"loss": 1.6036796569824219,
|
|
"step": 3177
|
|
},
|
|
{
|
|
"epoch": 1.6691176470588234,
|
|
"grad_norm": 10.12493423922255,
|
|
"learning_rate": 4.89304731842976e-06,
|
|
"loss": 2.090482711791992,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 1.6696428571428572,
|
|
"grad_norm": 7.235051189669831,
|
|
"learning_rate": 4.889992013844711e-06,
|
|
"loss": 1.5542412996292114,
|
|
"step": 3179
|
|
},
|
|
{
|
|
"epoch": 1.6701680672268906,
|
|
"grad_norm": 9.834356759414641,
|
|
"learning_rate": 4.886936750355487e-06,
|
|
"loss": 1.2818894386291504,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 1.6706932773109244,
|
|
"grad_norm": 13.540783886719886,
|
|
"learning_rate": 4.883881529103448e-06,
|
|
"loss": 1.5695828199386597,
|
|
"step": 3181
|
|
},
|
|
{
|
|
"epoch": 1.6712184873949578,
|
|
"grad_norm": 17.0558721728669,
|
|
"learning_rate": 4.880826351229937e-06,
|
|
"loss": 2.3164563179016113,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 1.6717436974789917,
|
|
"grad_norm": 9.226318265933307,
|
|
"learning_rate": 4.877771217876279e-06,
|
|
"loss": 2.172067403793335,
|
|
"step": 3183
|
|
},
|
|
{
|
|
"epoch": 1.6722689075630253,
|
|
"grad_norm": 6.517646082610535,
|
|
"learning_rate": 4.874716130183785e-06,
|
|
"loss": 0.8407649993896484,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 1.6727941176470589,
|
|
"grad_norm": 10.95684363009136,
|
|
"learning_rate": 4.8716610892937486e-06,
|
|
"loss": 1.159510612487793,
|
|
"step": 3185
|
|
},
|
|
{
|
|
"epoch": 1.6733193277310925,
|
|
"grad_norm": 15.441378641715309,
|
|
"learning_rate": 4.868606096347443e-06,
|
|
"loss": 1.2223591804504395,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 1.673844537815126,
|
|
"grad_norm": 10.039105001076978,
|
|
"learning_rate": 4.8655511524861295e-06,
|
|
"loss": 1.8431533575057983,
|
|
"step": 3187
|
|
},
|
|
{
|
|
"epoch": 1.6743697478991597,
|
|
"grad_norm": 14.004494334212994,
|
|
"learning_rate": 4.8624962588510456e-06,
|
|
"loss": 1.2883567810058594,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 1.6748949579831933,
|
|
"grad_norm": 9.295372095190801,
|
|
"learning_rate": 4.859441416583412e-06,
|
|
"loss": 1.9788686037063599,
|
|
"step": 3189
|
|
},
|
|
{
|
|
"epoch": 1.675420168067227,
|
|
"grad_norm": 12.096116013390292,
|
|
"learning_rate": 4.856386626824428e-06,
|
|
"loss": 1.2554341554641724,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 1.6759453781512605,
|
|
"grad_norm": 21.76745049762305,
|
|
"learning_rate": 4.8533318907152795e-06,
|
|
"loss": 1.0712530612945557,
|
|
"step": 3191
|
|
},
|
|
{
|
|
"epoch": 1.6764705882352942,
|
|
"grad_norm": 12.938479251535309,
|
|
"learning_rate": 4.850277209397125e-06,
|
|
"loss": 1.199103593826294,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 1.6769957983193278,
|
|
"grad_norm": 9.238235049662206,
|
|
"learning_rate": 4.847222584011107e-06,
|
|
"loss": 1.598036289215088,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 1.6775210084033614,
|
|
"grad_norm": 12.131559366166508,
|
|
"learning_rate": 4.8441680156983455e-06,
|
|
"loss": 1.5239869356155396,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 1.678046218487395,
|
|
"grad_norm": 8.232943246354864,
|
|
"learning_rate": 4.841113505599939e-06,
|
|
"loss": 1.785506010055542,
|
|
"step": 3195
|
|
},
|
|
{
|
|
"epoch": 1.6785714285714286,
|
|
"grad_norm": 11.917059082826425,
|
|
"learning_rate": 4.838059054856967e-06,
|
|
"loss": 1.4921265840530396,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 1.6790966386554622,
|
|
"grad_norm": 8.43131298640434,
|
|
"learning_rate": 4.8350046646104815e-06,
|
|
"loss": 2.3236143589019775,
|
|
"step": 3197
|
|
},
|
|
{
|
|
"epoch": 1.6796218487394958,
|
|
"grad_norm": 20.492115302876115,
|
|
"learning_rate": 4.831950336001518e-06,
|
|
"loss": 1.3714215755462646,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 1.6801470588235294,
|
|
"grad_norm": 9.50181582139722,
|
|
"learning_rate": 4.828896070171084e-06,
|
|
"loss": 0.9516867399215698,
|
|
"step": 3199
|
|
},
|
|
{
|
|
"epoch": 1.680672268907563,
|
|
"grad_norm": 10.560738331135111,
|
|
"learning_rate": 4.825841868260166e-06,
|
|
"loss": 1.1114282608032227,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 1.6811974789915967,
|
|
"grad_norm": 10.11621562454737,
|
|
"learning_rate": 4.8227877314097245e-06,
|
|
"loss": 2.444409132003784,
|
|
"step": 3201
|
|
},
|
|
{
|
|
"epoch": 1.6817226890756303,
|
|
"grad_norm": 12.090676048057713,
|
|
"learning_rate": 4.819733660760701e-06,
|
|
"loss": 1.4144461154937744,
|
|
"step": 3202
|
|
},
|
|
{
|
|
"epoch": 1.6822478991596639,
|
|
"grad_norm": 9.303163790082237,
|
|
"learning_rate": 4.8166796574540065e-06,
|
|
"loss": 1.714726209640503,
|
|
"step": 3203
|
|
},
|
|
{
|
|
"epoch": 1.6827731092436975,
|
|
"grad_norm": 9.100656720600508,
|
|
"learning_rate": 4.8136257226305295e-06,
|
|
"loss": 1.570493459701538,
|
|
"step": 3204
|
|
},
|
|
{
|
|
"epoch": 1.683298319327731,
|
|
"grad_norm": 10.776076519499956,
|
|
"learning_rate": 4.81057185743113e-06,
|
|
"loss": 2.1080472469329834,
|
|
"step": 3205
|
|
},
|
|
{
|
|
"epoch": 1.6838235294117647,
|
|
"grad_norm": 7.0919272657071994,
|
|
"learning_rate": 4.807518062996648e-06,
|
|
"loss": 2.0740365982055664,
|
|
"step": 3206
|
|
},
|
|
{
|
|
"epoch": 1.6843487394957983,
|
|
"grad_norm": 6.9403304143763185,
|
|
"learning_rate": 4.804464340467892e-06,
|
|
"loss": 1.694742202758789,
|
|
"step": 3207
|
|
},
|
|
{
|
|
"epoch": 1.684873949579832,
|
|
"grad_norm": 9.7811299170044,
|
|
"learning_rate": 4.801410690985643e-06,
|
|
"loss": 1.5359952449798584,
|
|
"step": 3208
|
|
},
|
|
{
|
|
"epoch": 1.6853991596638656,
|
|
"grad_norm": 7.2935403500847515,
|
|
"learning_rate": 4.798357115690661e-06,
|
|
"loss": 1.348632574081421,
|
|
"step": 3209
|
|
},
|
|
{
|
|
"epoch": 1.6859243697478992,
|
|
"grad_norm": 10.50041348163982,
|
|
"learning_rate": 4.795303615723671e-06,
|
|
"loss": 1.2157429456710815,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 1.6864495798319328,
|
|
"grad_norm": 11.848709652594424,
|
|
"learning_rate": 4.792250192225374e-06,
|
|
"loss": 0.9487016201019287,
|
|
"step": 3211
|
|
},
|
|
{
|
|
"epoch": 1.6869747899159664,
|
|
"grad_norm": 12.14284727204427,
|
|
"learning_rate": 4.789196846336439e-06,
|
|
"loss": 1.5461490154266357,
|
|
"step": 3212
|
|
},
|
|
{
|
|
"epoch": 1.6875,
|
|
"grad_norm": 8.496025704487556,
|
|
"learning_rate": 4.7861435791975124e-06,
|
|
"loss": 1.9235889911651611,
|
|
"step": 3213
|
|
},
|
|
{
|
|
"epoch": 1.6880252100840336,
|
|
"grad_norm": 14.438632669416723,
|
|
"learning_rate": 4.783090391949204e-06,
|
|
"loss": 1.9737489223480225,
|
|
"step": 3214
|
|
},
|
|
{
|
|
"epoch": 1.6885504201680672,
|
|
"grad_norm": 15.957219220291341,
|
|
"learning_rate": 4.7800372857320995e-06,
|
|
"loss": 1.7104718685150146,
|
|
"step": 3215
|
|
},
|
|
{
|
|
"epoch": 1.6890756302521008,
|
|
"grad_norm": 7.0164333325553585,
|
|
"learning_rate": 4.776984261686749e-06,
|
|
"loss": 1.6765732765197754,
|
|
"step": 3216
|
|
},
|
|
{
|
|
"epoch": 1.6896008403361344,
|
|
"grad_norm": 9.860218309864889,
|
|
"learning_rate": 4.7739313209536755e-06,
|
|
"loss": 1.4308388233184814,
|
|
"step": 3217
|
|
},
|
|
{
|
|
"epoch": 1.690126050420168,
|
|
"grad_norm": 7.395648801906977,
|
|
"learning_rate": 4.770878464673372e-06,
|
|
"loss": 1.5400488376617432,
|
|
"step": 3218
|
|
},
|
|
{
|
|
"epoch": 1.6906512605042017,
|
|
"grad_norm": 14.302049953275954,
|
|
"learning_rate": 4.767825693986295e-06,
|
|
"loss": 0.966601550579071,
|
|
"step": 3219
|
|
},
|
|
{
|
|
"epoch": 1.6911764705882353,
|
|
"grad_norm": 10.78606883215266,
|
|
"learning_rate": 4.764773010032874e-06,
|
|
"loss": 1.2765212059020996,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 1.691701680672269,
|
|
"grad_norm": 9.91042531527672,
|
|
"learning_rate": 4.761720413953503e-06,
|
|
"loss": 1.379080057144165,
|
|
"step": 3221
|
|
},
|
|
{
|
|
"epoch": 1.6922268907563025,
|
|
"grad_norm": 9.998140278256704,
|
|
"learning_rate": 4.758667906888545e-06,
|
|
"loss": 1.800308346748352,
|
|
"step": 3222
|
|
},
|
|
{
|
|
"epoch": 1.6927521008403361,
|
|
"grad_norm": 15.001657765860703,
|
|
"learning_rate": 4.755615489978328e-06,
|
|
"loss": 1.1739035844802856,
|
|
"step": 3223
|
|
},
|
|
{
|
|
"epoch": 1.6932773109243697,
|
|
"grad_norm": 11.251658744838176,
|
|
"learning_rate": 4.752563164363148e-06,
|
|
"loss": 1.0766520500183105,
|
|
"step": 3224
|
|
},
|
|
{
|
|
"epoch": 1.6938025210084033,
|
|
"grad_norm": 10.488158413312608,
|
|
"learning_rate": 4.7495109311832665e-06,
|
|
"loss": 1.6786444187164307,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 1.694327731092437,
|
|
"grad_norm": 11.222410267635443,
|
|
"learning_rate": 4.746458791578911e-06,
|
|
"loss": 1.9987932443618774,
|
|
"step": 3226
|
|
},
|
|
{
|
|
"epoch": 1.6948529411764706,
|
|
"grad_norm": 10.654387624082723,
|
|
"learning_rate": 4.743406746690272e-06,
|
|
"loss": 1.249092698097229,
|
|
"step": 3227
|
|
},
|
|
{
|
|
"epoch": 1.6953781512605042,
|
|
"grad_norm": 9.329416887100699,
|
|
"learning_rate": 4.740354797657504e-06,
|
|
"loss": 1.3510708808898926,
|
|
"step": 3228
|
|
},
|
|
{
|
|
"epoch": 1.6959033613445378,
|
|
"grad_norm": 8.588233129565467,
|
|
"learning_rate": 4.737302945620732e-06,
|
|
"loss": 1.4824929237365723,
|
|
"step": 3229
|
|
},
|
|
{
|
|
"epoch": 1.6964285714285714,
|
|
"grad_norm": 7.59823268809456,
|
|
"learning_rate": 4.7342511917200375e-06,
|
|
"loss": 1.5797340869903564,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 1.696953781512605,
|
|
"grad_norm": 11.89070608878343,
|
|
"learning_rate": 4.731199537095468e-06,
|
|
"loss": 1.2261934280395508,
|
|
"step": 3231
|
|
},
|
|
{
|
|
"epoch": 1.6974789915966386,
|
|
"grad_norm": 21.459642852780544,
|
|
"learning_rate": 4.728147982887034e-06,
|
|
"loss": 1.3860855102539062,
|
|
"step": 3232
|
|
},
|
|
{
|
|
"epoch": 1.6980042016806722,
|
|
"grad_norm": 7.461367326422217,
|
|
"learning_rate": 4.72509653023471e-06,
|
|
"loss": 1.7877720594406128,
|
|
"step": 3233
|
|
},
|
|
{
|
|
"epoch": 1.6985294117647058,
|
|
"grad_norm": 13.236189493835539,
|
|
"learning_rate": 4.722045180278431e-06,
|
|
"loss": 2.2877883911132812,
|
|
"step": 3234
|
|
},
|
|
{
|
|
"epoch": 1.6990546218487395,
|
|
"grad_norm": 8.133082883032813,
|
|
"learning_rate": 4.718993934158092e-06,
|
|
"loss": 1.3185124397277832,
|
|
"step": 3235
|
|
},
|
|
{
|
|
"epoch": 1.699579831932773,
|
|
"grad_norm": 9.230899339281175,
|
|
"learning_rate": 4.715942793013551e-06,
|
|
"loss": 1.5218816995620728,
|
|
"step": 3236
|
|
},
|
|
{
|
|
"epoch": 1.7001050420168067,
|
|
"grad_norm": 11.986139333558564,
|
|
"learning_rate": 4.712891757984629e-06,
|
|
"loss": 1.2758816480636597,
|
|
"step": 3237
|
|
},
|
|
{
|
|
"epoch": 1.7006302521008403,
|
|
"grad_norm": 8.2272559909767,
|
|
"learning_rate": 4.709840830211105e-06,
|
|
"loss": 1.7246310710906982,
|
|
"step": 3238
|
|
},
|
|
{
|
|
"epoch": 1.701155462184874,
|
|
"grad_norm": 8.356415866718445,
|
|
"learning_rate": 4.706790010832714e-06,
|
|
"loss": 1.6995426416397095,
|
|
"step": 3239
|
|
},
|
|
{
|
|
"epoch": 1.7016806722689075,
|
|
"grad_norm": 12.47543946791477,
|
|
"learning_rate": 4.703739300989159e-06,
|
|
"loss": 1.4347538948059082,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 1.7022058823529411,
|
|
"grad_norm": 13.775898806027204,
|
|
"learning_rate": 4.700688701820096e-06,
|
|
"loss": 2.091193914413452,
|
|
"step": 3241
|
|
},
|
|
{
|
|
"epoch": 1.7027310924369747,
|
|
"grad_norm": 11.814585500912411,
|
|
"learning_rate": 4.697638214465141e-06,
|
|
"loss": 0.9931437969207764,
|
|
"step": 3242
|
|
},
|
|
{
|
|
"epoch": 1.7032563025210083,
|
|
"grad_norm": 12.695009444158382,
|
|
"learning_rate": 4.694587840063868e-06,
|
|
"loss": 1.3028037548065186,
|
|
"step": 3243
|
|
},
|
|
{
|
|
"epoch": 1.7037815126050422,
|
|
"grad_norm": 8.928646860224024,
|
|
"learning_rate": 4.691537579755811e-06,
|
|
"loss": 1.7238823175430298,
|
|
"step": 3244
|
|
},
|
|
{
|
|
"epoch": 1.7043067226890756,
|
|
"grad_norm": 12.040154975154097,
|
|
"learning_rate": 4.688487434680459e-06,
|
|
"loss": 1.7754493951797485,
|
|
"step": 3245
|
|
},
|
|
{
|
|
"epoch": 1.7048319327731094,
|
|
"grad_norm": 7.535248898985819,
|
|
"learning_rate": 4.685437405977258e-06,
|
|
"loss": 1.4713134765625,
|
|
"step": 3246
|
|
},
|
|
{
|
|
"epoch": 1.7053571428571428,
|
|
"grad_norm": 12.984821814540634,
|
|
"learning_rate": 4.68238749478561e-06,
|
|
"loss": 1.2907443046569824,
|
|
"step": 3247
|
|
},
|
|
{
|
|
"epoch": 1.7058823529411766,
|
|
"grad_norm": 9.124845111639345,
|
|
"learning_rate": 4.679337702244877e-06,
|
|
"loss": 1.702890157699585,
|
|
"step": 3248
|
|
},
|
|
{
|
|
"epoch": 1.70640756302521,
|
|
"grad_norm": 9.278216348282228,
|
|
"learning_rate": 4.6762880294943734e-06,
|
|
"loss": 0.8058304786682129,
|
|
"step": 3249
|
|
},
|
|
{
|
|
"epoch": 1.7069327731092439,
|
|
"grad_norm": 12.30031432785795,
|
|
"learning_rate": 4.673238477673367e-06,
|
|
"loss": 1.395681619644165,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 1.7074579831932772,
|
|
"grad_norm": 12.83788411809448,
|
|
"learning_rate": 4.670189047921086e-06,
|
|
"loss": 1.439389705657959,
|
|
"step": 3251
|
|
},
|
|
{
|
|
"epoch": 1.707983193277311,
|
|
"grad_norm": 12.65720366210558,
|
|
"learning_rate": 4.667139741376708e-06,
|
|
"loss": 1.636742353439331,
|
|
"step": 3252
|
|
},
|
|
{
|
|
"epoch": 1.7085084033613445,
|
|
"grad_norm": 13.730427366000661,
|
|
"learning_rate": 4.664090559179367e-06,
|
|
"loss": 1.541890025138855,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 1.7090336134453783,
|
|
"grad_norm": 17.642703265607015,
|
|
"learning_rate": 4.661041502468149e-06,
|
|
"loss": 1.2382596731185913,
|
|
"step": 3254
|
|
},
|
|
{
|
|
"epoch": 1.7095588235294117,
|
|
"grad_norm": 12.39135845211606,
|
|
"learning_rate": 4.657992572382095e-06,
|
|
"loss": 1.3252674341201782,
|
|
"step": 3255
|
|
},
|
|
{
|
|
"epoch": 1.7100840336134455,
|
|
"grad_norm": 8.631156447445784,
|
|
"learning_rate": 4.654943770060197e-06,
|
|
"loss": 2.0390126705169678,
|
|
"step": 3256
|
|
},
|
|
{
|
|
"epoch": 1.710609243697479,
|
|
"grad_norm": 7.443829683098495,
|
|
"learning_rate": 4.651895096641402e-06,
|
|
"loss": 1.3086891174316406,
|
|
"step": 3257
|
|
},
|
|
{
|
|
"epoch": 1.7111344537815127,
|
|
"grad_norm": 14.10406049817156,
|
|
"learning_rate": 4.648846553264603e-06,
|
|
"loss": 1.1268144845962524,
|
|
"step": 3258
|
|
},
|
|
{
|
|
"epoch": 1.7116596638655461,
|
|
"grad_norm": 19.653444071824573,
|
|
"learning_rate": 4.6457981410686524e-06,
|
|
"loss": 1.4325177669525146,
|
|
"step": 3259
|
|
},
|
|
{
|
|
"epoch": 1.71218487394958,
|
|
"grad_norm": 13.769415760561396,
|
|
"learning_rate": 4.642749861192347e-06,
|
|
"loss": 1.728335976600647,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 1.7127100840336134,
|
|
"grad_norm": 9.118957495131754,
|
|
"learning_rate": 4.639701714774439e-06,
|
|
"loss": 2.0761523246765137,
|
|
"step": 3261
|
|
},
|
|
{
|
|
"epoch": 1.7132352941176472,
|
|
"grad_norm": 13.743227667640902,
|
|
"learning_rate": 4.6366537029536236e-06,
|
|
"loss": 1.4593192338943481,
|
|
"step": 3262
|
|
},
|
|
{
|
|
"epoch": 1.7137605042016806,
|
|
"grad_norm": 9.878282202025337,
|
|
"learning_rate": 4.633605826868556e-06,
|
|
"loss": 1.1992371082305908,
|
|
"step": 3263
|
|
},
|
|
{
|
|
"epoch": 1.7142857142857144,
|
|
"grad_norm": 21.10567191645549,
|
|
"learning_rate": 4.630558087657832e-06,
|
|
"loss": 2.5870370864868164,
|
|
"step": 3264
|
|
},
|
|
{
|
|
"epoch": 1.7148109243697478,
|
|
"grad_norm": 8.03974236204865,
|
|
"learning_rate": 4.627510486459999e-06,
|
|
"loss": 1.3676621913909912,
|
|
"step": 3265
|
|
},
|
|
{
|
|
"epoch": 1.7153361344537816,
|
|
"grad_norm": 14.39685510115169,
|
|
"learning_rate": 4.624463024413557e-06,
|
|
"loss": 1.3622328042984009,
|
|
"step": 3266
|
|
},
|
|
{
|
|
"epoch": 1.715861344537815,
|
|
"grad_norm": 10.89889824608526,
|
|
"learning_rate": 4.621415702656948e-06,
|
|
"loss": 1.913252592086792,
|
|
"step": 3267
|
|
},
|
|
{
|
|
"epoch": 1.7163865546218489,
|
|
"grad_norm": 10.04186725134007,
|
|
"learning_rate": 4.618368522328563e-06,
|
|
"loss": 1.382278561592102,
|
|
"step": 3268
|
|
},
|
|
{
|
|
"epoch": 1.7169117647058822,
|
|
"grad_norm": 10.013418711296895,
|
|
"learning_rate": 4.615321484566741e-06,
|
|
"loss": 1.0073938369750977,
|
|
"step": 3269
|
|
},
|
|
{
|
|
"epoch": 1.717436974789916,
|
|
"grad_norm": 7.891439724675239,
|
|
"learning_rate": 4.61227459050977e-06,
|
|
"loss": 1.5652869939804077,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 1.7179621848739495,
|
|
"grad_norm": 13.824348774478754,
|
|
"learning_rate": 4.609227841295882e-06,
|
|
"loss": 1.2578716278076172,
|
|
"step": 3271
|
|
},
|
|
{
|
|
"epoch": 1.7184873949579833,
|
|
"grad_norm": 13.651501915607122,
|
|
"learning_rate": 4.606181238063253e-06,
|
|
"loss": 1.4699373245239258,
|
|
"step": 3272
|
|
},
|
|
{
|
|
"epoch": 1.7190126050420167,
|
|
"grad_norm": 8.972590551870093,
|
|
"learning_rate": 4.603134781950007e-06,
|
|
"loss": 1.0772695541381836,
|
|
"step": 3273
|
|
},
|
|
{
|
|
"epoch": 1.7195378151260505,
|
|
"grad_norm": 17.37886197109619,
|
|
"learning_rate": 4.6000884740942135e-06,
|
|
"loss": 1.1347074508666992,
|
|
"step": 3274
|
|
},
|
|
{
|
|
"epoch": 1.720063025210084,
|
|
"grad_norm": 9.280507396035626,
|
|
"learning_rate": 4.597042315633885e-06,
|
|
"loss": 2.2019081115722656,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 1.7205882352941178,
|
|
"grad_norm": 10.394213281708955,
|
|
"learning_rate": 4.593996307706979e-06,
|
|
"loss": 1.4583905935287476,
|
|
"step": 3276
|
|
},
|
|
{
|
|
"epoch": 1.7211134453781511,
|
|
"grad_norm": 12.516320878145171,
|
|
"learning_rate": 4.590950451451397e-06,
|
|
"loss": 1.6682995557785034,
|
|
"step": 3277
|
|
},
|
|
{
|
|
"epoch": 1.721638655462185,
|
|
"grad_norm": 11.057605401044427,
|
|
"learning_rate": 4.587904748004984e-06,
|
|
"loss": 1.3575992584228516,
|
|
"step": 3278
|
|
},
|
|
{
|
|
"epoch": 1.7221638655462184,
|
|
"grad_norm": 19.151720887311562,
|
|
"learning_rate": 4.584859198505526e-06,
|
|
"loss": 1.5850321054458618,
|
|
"step": 3279
|
|
},
|
|
{
|
|
"epoch": 1.7226890756302522,
|
|
"grad_norm": 7.253040274635175,
|
|
"learning_rate": 4.581813804090752e-06,
|
|
"loss": 1.7625868320465088,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 1.7232142857142856,
|
|
"grad_norm": 8.94724572141308,
|
|
"learning_rate": 4.578768565898337e-06,
|
|
"loss": 1.3413604497909546,
|
|
"step": 3281
|
|
},
|
|
{
|
|
"epoch": 1.7237394957983194,
|
|
"grad_norm": 12.060253151336724,
|
|
"learning_rate": 4.575723485065893e-06,
|
|
"loss": 1.479799747467041,
|
|
"step": 3282
|
|
},
|
|
{
|
|
"epoch": 1.7242647058823528,
|
|
"grad_norm": 8.933308583101603,
|
|
"learning_rate": 4.5726785627309736e-06,
|
|
"loss": 1.6844385862350464,
|
|
"step": 3283
|
|
},
|
|
{
|
|
"epoch": 1.7247899159663866,
|
|
"grad_norm": 9.109789906182867,
|
|
"learning_rate": 4.569633800031075e-06,
|
|
"loss": 1.1544994115829468,
|
|
"step": 3284
|
|
},
|
|
{
|
|
"epoch": 1.72531512605042,
|
|
"grad_norm": 10.890598985413234,
|
|
"learning_rate": 4.566589198103635e-06,
|
|
"loss": 1.9263209104537964,
|
|
"step": 3285
|
|
},
|
|
{
|
|
"epoch": 1.7258403361344539,
|
|
"grad_norm": 10.304930311250292,
|
|
"learning_rate": 4.563544758086028e-06,
|
|
"loss": 1.5095046758651733,
|
|
"step": 3286
|
|
},
|
|
{
|
|
"epoch": 1.7263655462184873,
|
|
"grad_norm": 11.191141727298945,
|
|
"learning_rate": 4.560500481115568e-06,
|
|
"loss": 1.7304835319519043,
|
|
"step": 3287
|
|
},
|
|
{
|
|
"epoch": 1.726890756302521,
|
|
"grad_norm": 9.159675759410801,
|
|
"learning_rate": 4.557456368329511e-06,
|
|
"loss": 1.6753727197647095,
|
|
"step": 3288
|
|
},
|
|
{
|
|
"epoch": 1.7274159663865545,
|
|
"grad_norm": 9.301470579587201,
|
|
"learning_rate": 4.554412420865052e-06,
|
|
"loss": 1.813326358795166,
|
|
"step": 3289
|
|
},
|
|
{
|
|
"epoch": 1.7279411764705883,
|
|
"grad_norm": 9.427261962366169,
|
|
"learning_rate": 4.551368639859319e-06,
|
|
"loss": 2.2732186317443848,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 1.7284663865546217,
|
|
"grad_norm": 15.513657851664618,
|
|
"learning_rate": 4.5483250264493816e-06,
|
|
"loss": 1.6619755029678345,
|
|
"step": 3291
|
|
},
|
|
{
|
|
"epoch": 1.7289915966386555,
|
|
"grad_norm": 8.855328874944085,
|
|
"learning_rate": 4.545281581772249e-06,
|
|
"loss": 2.7943356037139893,
|
|
"step": 3292
|
|
},
|
|
{
|
|
"epoch": 1.729516806722689,
|
|
"grad_norm": 24.748749458259724,
|
|
"learning_rate": 4.542238306964863e-06,
|
|
"loss": 2.413684844970703,
|
|
"step": 3293
|
|
},
|
|
{
|
|
"epoch": 1.7300420168067228,
|
|
"grad_norm": 12.258830497307358,
|
|
"learning_rate": 4.539195203164104e-06,
|
|
"loss": 1.4870705604553223,
|
|
"step": 3294
|
|
},
|
|
{
|
|
"epoch": 1.7305672268907561,
|
|
"grad_norm": 13.412270345655795,
|
|
"learning_rate": 4.536152271506787e-06,
|
|
"loss": 2.348081111907959,
|
|
"step": 3295
|
|
},
|
|
{
|
|
"epoch": 1.73109243697479,
|
|
"grad_norm": 11.602057489602387,
|
|
"learning_rate": 4.533109513129666e-06,
|
|
"loss": 1.6497387886047363,
|
|
"step": 3296
|
|
},
|
|
{
|
|
"epoch": 1.7316176470588234,
|
|
"grad_norm": 8.0057633669754,
|
|
"learning_rate": 4.530066929169427e-06,
|
|
"loss": 2.1390504837036133,
|
|
"step": 3297
|
|
},
|
|
{
|
|
"epoch": 1.7321428571428572,
|
|
"grad_norm": 10.114554074478095,
|
|
"learning_rate": 4.527024520762693e-06,
|
|
"loss": 1.4101120233535767,
|
|
"step": 3298
|
|
},
|
|
{
|
|
"epoch": 1.7326680672268906,
|
|
"grad_norm": 9.537079628839239,
|
|
"learning_rate": 4.5239822890460174e-06,
|
|
"loss": 2.055835247039795,
|
|
"step": 3299
|
|
},
|
|
{
|
|
"epoch": 1.7331932773109244,
|
|
"grad_norm": 10.16957557101485,
|
|
"learning_rate": 4.520940235155895e-06,
|
|
"loss": 1.576056957244873,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 1.7337184873949578,
|
|
"grad_norm": 12.650944492447211,
|
|
"learning_rate": 4.5178983602287476e-06,
|
|
"loss": 0.8783017992973328,
|
|
"step": 3301
|
|
},
|
|
{
|
|
"epoch": 1.7342436974789917,
|
|
"grad_norm": 13.313690645956767,
|
|
"learning_rate": 4.514856665400931e-06,
|
|
"loss": 1.7578872442245483,
|
|
"step": 3302
|
|
},
|
|
{
|
|
"epoch": 1.7347689075630253,
|
|
"grad_norm": 7.932697646965227,
|
|
"learning_rate": 4.511815151808737e-06,
|
|
"loss": 1.4982397556304932,
|
|
"step": 3303
|
|
},
|
|
{
|
|
"epoch": 1.7352941176470589,
|
|
"grad_norm": 9.13772049030606,
|
|
"learning_rate": 4.508773820588388e-06,
|
|
"loss": 0.7393516898155212,
|
|
"step": 3304
|
|
},
|
|
{
|
|
"epoch": 1.7358193277310925,
|
|
"grad_norm": 13.73448788701065,
|
|
"learning_rate": 4.505732672876037e-06,
|
|
"loss": 1.6219474077224731,
|
|
"step": 3305
|
|
},
|
|
{
|
|
"epoch": 1.736344537815126,
|
|
"grad_norm": 10.12350304306231,
|
|
"learning_rate": 4.502691709807769e-06,
|
|
"loss": 0.7352396249771118,
|
|
"step": 3306
|
|
},
|
|
{
|
|
"epoch": 1.7368697478991597,
|
|
"grad_norm": 12.420606931770829,
|
|
"learning_rate": 4.499650932519602e-06,
|
|
"loss": 0.8984926342964172,
|
|
"step": 3307
|
|
},
|
|
{
|
|
"epoch": 1.7373949579831933,
|
|
"grad_norm": 8.47908913163684,
|
|
"learning_rate": 4.496610342147482e-06,
|
|
"loss": 1.7162363529205322,
|
|
"step": 3308
|
|
},
|
|
{
|
|
"epoch": 1.737920168067227,
|
|
"grad_norm": 7.619976734371435,
|
|
"learning_rate": 4.493569939827288e-06,
|
|
"loss": 1.6412806510925293,
|
|
"step": 3309
|
|
},
|
|
{
|
|
"epoch": 1.7384453781512605,
|
|
"grad_norm": 12.06422786644797,
|
|
"learning_rate": 4.490529726694823e-06,
|
|
"loss": 2.057121515274048,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 1.7389705882352942,
|
|
"grad_norm": 6.4841010505199215,
|
|
"learning_rate": 4.487489703885828e-06,
|
|
"loss": 1.6091282367706299,
|
|
"step": 3311
|
|
},
|
|
{
|
|
"epoch": 1.7394957983193278,
|
|
"grad_norm": 10.9727966659055,
|
|
"learning_rate": 4.4844498725359655e-06,
|
|
"loss": 1.5243709087371826,
|
|
"step": 3312
|
|
},
|
|
{
|
|
"epoch": 1.7400210084033614,
|
|
"grad_norm": 14.925627239097777,
|
|
"learning_rate": 4.48141023378083e-06,
|
|
"loss": 1.7952152490615845,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 1.740546218487395,
|
|
"grad_norm": 9.18924821922222,
|
|
"learning_rate": 4.478370788755943e-06,
|
|
"loss": 1.1616581678390503,
|
|
"step": 3314
|
|
},
|
|
{
|
|
"epoch": 1.7410714285714286,
|
|
"grad_norm": 27.642984586784024,
|
|
"learning_rate": 4.475331538596755e-06,
|
|
"loss": 1.4816443920135498,
|
|
"step": 3315
|
|
},
|
|
{
|
|
"epoch": 1.7415966386554622,
|
|
"grad_norm": 21.937547201530496,
|
|
"learning_rate": 4.472292484438642e-06,
|
|
"loss": 1.9132540225982666,
|
|
"step": 3316
|
|
},
|
|
{
|
|
"epoch": 1.7421218487394958,
|
|
"grad_norm": 7.625323026752423,
|
|
"learning_rate": 4.4692536274169055e-06,
|
|
"loss": 1.10856032371521,
|
|
"step": 3317
|
|
},
|
|
{
|
|
"epoch": 1.7426470588235294,
|
|
"grad_norm": 10.424587074678794,
|
|
"learning_rate": 4.466214968666777e-06,
|
|
"loss": 1.2741047143936157,
|
|
"step": 3318
|
|
},
|
|
{
|
|
"epoch": 1.743172268907563,
|
|
"grad_norm": 7.191319094455383,
|
|
"learning_rate": 4.463176509323413e-06,
|
|
"loss": 1.3879424333572388,
|
|
"step": 3319
|
|
},
|
|
{
|
|
"epoch": 1.7436974789915967,
|
|
"grad_norm": 9.07896005083348,
|
|
"learning_rate": 4.460138250521895e-06,
|
|
"loss": 1.0268973112106323,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 1.7442226890756303,
|
|
"grad_norm": 9.850944379791787,
|
|
"learning_rate": 4.457100193397226e-06,
|
|
"loss": 1.3899263143539429,
|
|
"step": 3321
|
|
},
|
|
{
|
|
"epoch": 1.7447478991596639,
|
|
"grad_norm": 13.18865769279598,
|
|
"learning_rate": 4.454062339084339e-06,
|
|
"loss": 1.6372075080871582,
|
|
"step": 3322
|
|
},
|
|
{
|
|
"epoch": 1.7452731092436975,
|
|
"grad_norm": 8.569394293651449,
|
|
"learning_rate": 4.45102468871809e-06,
|
|
"loss": 1.781775712966919,
|
|
"step": 3323
|
|
},
|
|
{
|
|
"epoch": 1.745798319327731,
|
|
"grad_norm": 11.961694616258718,
|
|
"learning_rate": 4.447987243433256e-06,
|
|
"loss": 1.275670051574707,
|
|
"step": 3324
|
|
},
|
|
{
|
|
"epoch": 1.7463235294117647,
|
|
"grad_norm": 8.835867447955053,
|
|
"learning_rate": 4.444950004364542e-06,
|
|
"loss": 1.282447338104248,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 1.7468487394957983,
|
|
"grad_norm": 9.805430443800143,
|
|
"learning_rate": 4.4419129726465706e-06,
|
|
"loss": 1.631540298461914,
|
|
"step": 3326
|
|
},
|
|
{
|
|
"epoch": 1.747373949579832,
|
|
"grad_norm": 18.016814087823914,
|
|
"learning_rate": 4.438876149413891e-06,
|
|
"loss": 1.465498685836792,
|
|
"step": 3327
|
|
},
|
|
{
|
|
"epoch": 1.7478991596638656,
|
|
"grad_norm": 7.270863933286584,
|
|
"learning_rate": 4.435839535800973e-06,
|
|
"loss": 2.469688892364502,
|
|
"step": 3328
|
|
},
|
|
{
|
|
"epoch": 1.7484243697478992,
|
|
"grad_norm": 13.441130430294614,
|
|
"learning_rate": 4.432803132942208e-06,
|
|
"loss": 1.0278105735778809,
|
|
"step": 3329
|
|
},
|
|
{
|
|
"epoch": 1.7489495798319328,
|
|
"grad_norm": 12.152997262367775,
|
|
"learning_rate": 4.42976694197191e-06,
|
|
"loss": 2.481796979904175,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 1.7494747899159664,
|
|
"grad_norm": 11.785155259849487,
|
|
"learning_rate": 4.426730964024312e-06,
|
|
"loss": 1.6311910152435303,
|
|
"step": 3331
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"grad_norm": 12.053458159721158,
|
|
"learning_rate": 4.423695200233567e-06,
|
|
"loss": 1.555237889289856,
|
|
"step": 3332
|
|
},
|
|
{
|
|
"epoch": 1.7505252100840336,
|
|
"grad_norm": 15.267338325021921,
|
|
"learning_rate": 4.420659651733751e-06,
|
|
"loss": 1.6818935871124268,
|
|
"step": 3333
|
|
},
|
|
{
|
|
"epoch": 1.7510504201680672,
|
|
"grad_norm": 12.925611257584087,
|
|
"learning_rate": 4.417624319658857e-06,
|
|
"loss": 1.6823792457580566,
|
|
"step": 3334
|
|
},
|
|
{
|
|
"epoch": 1.7515756302521008,
|
|
"grad_norm": 10.39583822503065,
|
|
"learning_rate": 4.4145892051427985e-06,
|
|
"loss": 1.2439255714416504,
|
|
"step": 3335
|
|
},
|
|
{
|
|
"epoch": 1.7521008403361344,
|
|
"grad_norm": 12.40423078330543,
|
|
"learning_rate": 4.4115543093194055e-06,
|
|
"loss": 1.1736741065979004,
|
|
"step": 3336
|
|
},
|
|
{
|
|
"epoch": 1.752626050420168,
|
|
"grad_norm": 10.300241726801236,
|
|
"learning_rate": 4.40851963332243e-06,
|
|
"loss": 1.4935336112976074,
|
|
"step": 3337
|
|
},
|
|
{
|
|
"epoch": 1.7531512605042017,
|
|
"grad_norm": 8.653039954025626,
|
|
"learning_rate": 4.40548517828554e-06,
|
|
"loss": 1.4860565662384033,
|
|
"step": 3338
|
|
},
|
|
{
|
|
"epoch": 1.7536764705882353,
|
|
"grad_norm": 12.089012376892505,
|
|
"learning_rate": 4.402450945342317e-06,
|
|
"loss": 1.3899967670440674,
|
|
"step": 3339
|
|
},
|
|
{
|
|
"epoch": 1.754201680672269,
|
|
"grad_norm": 15.542259417644336,
|
|
"learning_rate": 4.399416935626268e-06,
|
|
"loss": 1.4277682304382324,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 1.7547268907563025,
|
|
"grad_norm": 11.847544756393665,
|
|
"learning_rate": 4.396383150270811e-06,
|
|
"loss": 1.3811930418014526,
|
|
"step": 3341
|
|
},
|
|
{
|
|
"epoch": 1.7552521008403361,
|
|
"grad_norm": 12.396390017275069,
|
|
"learning_rate": 4.393349590409279e-06,
|
|
"loss": 1.3842015266418457,
|
|
"step": 3342
|
|
},
|
|
{
|
|
"epoch": 1.7557773109243697,
|
|
"grad_norm": 9.907882075907828,
|
|
"learning_rate": 4.3903162571749234e-06,
|
|
"loss": 1.997004747390747,
|
|
"step": 3343
|
|
},
|
|
{
|
|
"epoch": 1.7563025210084033,
|
|
"grad_norm": 10.20639038058395,
|
|
"learning_rate": 4.387283151700912e-06,
|
|
"loss": 1.267422080039978,
|
|
"step": 3344
|
|
},
|
|
{
|
|
"epoch": 1.756827731092437,
|
|
"grad_norm": 21.384778136434107,
|
|
"learning_rate": 4.384250275120325e-06,
|
|
"loss": 1.5731201171875,
|
|
"step": 3345
|
|
},
|
|
{
|
|
"epoch": 1.7573529411764706,
|
|
"grad_norm": 8.928437297808067,
|
|
"learning_rate": 4.381217628566158e-06,
|
|
"loss": 1.5649099349975586,
|
|
"step": 3346
|
|
},
|
|
{
|
|
"epoch": 1.7578781512605042,
|
|
"grad_norm": 14.0399760249689,
|
|
"learning_rate": 4.3781852131713195e-06,
|
|
"loss": 1.6794086694717407,
|
|
"step": 3347
|
|
},
|
|
{
|
|
"epoch": 1.7584033613445378,
|
|
"grad_norm": 9.829103731915518,
|
|
"learning_rate": 4.375153030068635e-06,
|
|
"loss": 1.2046095132827759,
|
|
"step": 3348
|
|
},
|
|
{
|
|
"epoch": 1.7589285714285714,
|
|
"grad_norm": 7.8112019758600395,
|
|
"learning_rate": 4.372121080390841e-06,
|
|
"loss": 1.5325660705566406,
|
|
"step": 3349
|
|
},
|
|
{
|
|
"epoch": 1.759453781512605,
|
|
"grad_norm": 7.860698784140351,
|
|
"learning_rate": 4.3690893652705835e-06,
|
|
"loss": 1.4487473964691162,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 1.7599789915966386,
|
|
"grad_norm": 12.49874827300103,
|
|
"learning_rate": 4.366057885840429e-06,
|
|
"loss": 1.4863296747207642,
|
|
"step": 3351
|
|
},
|
|
{
|
|
"epoch": 1.7605042016806722,
|
|
"grad_norm": 10.411555532184561,
|
|
"learning_rate": 4.363026643232847e-06,
|
|
"loss": 1.3299884796142578,
|
|
"step": 3352
|
|
},
|
|
{
|
|
"epoch": 1.7610294117647058,
|
|
"grad_norm": 9.623678550194937,
|
|
"learning_rate": 4.359995638580226e-06,
|
|
"loss": 1.5486626625061035,
|
|
"step": 3353
|
|
},
|
|
{
|
|
"epoch": 1.7615546218487395,
|
|
"grad_norm": 11.083364968840693,
|
|
"learning_rate": 4.356964873014859e-06,
|
|
"loss": 1.5557365417480469,
|
|
"step": 3354
|
|
},
|
|
{
|
|
"epoch": 1.762079831932773,
|
|
"grad_norm": 15.604329450730432,
|
|
"learning_rate": 4.353934347668957e-06,
|
|
"loss": 1.2399992942810059,
|
|
"step": 3355
|
|
},
|
|
{
|
|
"epoch": 1.7626050420168067,
|
|
"grad_norm": 10.485855074872733,
|
|
"learning_rate": 4.350904063674635e-06,
|
|
"loss": 1.7098774909973145,
|
|
"step": 3356
|
|
},
|
|
{
|
|
"epoch": 1.7631302521008403,
|
|
"grad_norm": 9.49596531837048,
|
|
"learning_rate": 4.34787402216392e-06,
|
|
"loss": 1.345895767211914,
|
|
"step": 3357
|
|
},
|
|
{
|
|
"epoch": 1.763655462184874,
|
|
"grad_norm": 10.495052412531466,
|
|
"learning_rate": 4.344844224268747e-06,
|
|
"loss": 1.2275123596191406,
|
|
"step": 3358
|
|
},
|
|
{
|
|
"epoch": 1.7641806722689075,
|
|
"grad_norm": 12.124976384551145,
|
|
"learning_rate": 4.341814671120965e-06,
|
|
"loss": 2.058727264404297,
|
|
"step": 3359
|
|
},
|
|
{
|
|
"epoch": 1.7647058823529411,
|
|
"grad_norm": 11.515539949504657,
|
|
"learning_rate": 4.338785363852327e-06,
|
|
"loss": 1.6749471426010132,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 1.7652310924369747,
|
|
"grad_norm": 13.502176996716772,
|
|
"learning_rate": 4.335756303594493e-06,
|
|
"loss": 1.5013811588287354,
|
|
"step": 3361
|
|
},
|
|
{
|
|
"epoch": 1.7657563025210083,
|
|
"grad_norm": 20.891526609455187,
|
|
"learning_rate": 4.332727491479035e-06,
|
|
"loss": 1.570244312286377,
|
|
"step": 3362
|
|
},
|
|
{
|
|
"epoch": 1.7662815126050422,
|
|
"grad_norm": 17.691419361444233,
|
|
"learning_rate": 4.32969892863743e-06,
|
|
"loss": 1.6747288703918457,
|
|
"step": 3363
|
|
},
|
|
{
|
|
"epoch": 1.7668067226890756,
|
|
"grad_norm": 7.550067948267033,
|
|
"learning_rate": 4.326670616201063e-06,
|
|
"loss": 1.5018736124038696,
|
|
"step": 3364
|
|
},
|
|
{
|
|
"epoch": 1.7673319327731094,
|
|
"grad_norm": 22.559536920091926,
|
|
"learning_rate": 4.323642555301222e-06,
|
|
"loss": 2.438303232192993,
|
|
"step": 3365
|
|
},
|
|
{
|
|
"epoch": 1.7678571428571428,
|
|
"grad_norm": 9.962722549165758,
|
|
"learning_rate": 4.320614747069106e-06,
|
|
"loss": 1.6288079023361206,
|
|
"step": 3366
|
|
},
|
|
{
|
|
"epoch": 1.7683823529411766,
|
|
"grad_norm": 27.48397340970555,
|
|
"learning_rate": 4.317587192635816e-06,
|
|
"loss": 1.795086145401001,
|
|
"step": 3367
|
|
},
|
|
{
|
|
"epoch": 1.76890756302521,
|
|
"grad_norm": 7.209285926501159,
|
|
"learning_rate": 4.31455989313236e-06,
|
|
"loss": 1.5091423988342285,
|
|
"step": 3368
|
|
},
|
|
{
|
|
"epoch": 1.7694327731092439,
|
|
"grad_norm": 13.77553263087726,
|
|
"learning_rate": 4.311532849689649e-06,
|
|
"loss": 1.4646027088165283,
|
|
"step": 3369
|
|
},
|
|
{
|
|
"epoch": 1.7699579831932772,
|
|
"grad_norm": 17.485984667911005,
|
|
"learning_rate": 4.308506063438502e-06,
|
|
"loss": 1.3757256269454956,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 1.770483193277311,
|
|
"grad_norm": 10.05897873277297,
|
|
"learning_rate": 4.305479535509637e-06,
|
|
"loss": 1.4939477443695068,
|
|
"step": 3371
|
|
},
|
|
{
|
|
"epoch": 1.7710084033613445,
|
|
"grad_norm": 11.529853793582946,
|
|
"learning_rate": 4.30245326703368e-06,
|
|
"loss": 1.780709147453308,
|
|
"step": 3372
|
|
},
|
|
{
|
|
"epoch": 1.7715336134453783,
|
|
"grad_norm": 9.210731002187607,
|
|
"learning_rate": 4.299427259141155e-06,
|
|
"loss": 1.1474990844726562,
|
|
"step": 3373
|
|
},
|
|
{
|
|
"epoch": 1.7720588235294117,
|
|
"grad_norm": 14.251205872521062,
|
|
"learning_rate": 4.296401512962496e-06,
|
|
"loss": 1.3692691326141357,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 1.7725840336134455,
|
|
"grad_norm": 19.659885966250275,
|
|
"learning_rate": 4.293376029628031e-06,
|
|
"loss": 1.4936809539794922,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 1.773109243697479,
|
|
"grad_norm": 13.936876011199358,
|
|
"learning_rate": 4.290350810267995e-06,
|
|
"loss": 1.599841594696045,
|
|
"step": 3376
|
|
},
|
|
{
|
|
"epoch": 1.7736344537815127,
|
|
"grad_norm": 18.38570066850407,
|
|
"learning_rate": 4.2873258560125244e-06,
|
|
"loss": 1.963813066482544,
|
|
"step": 3377
|
|
},
|
|
{
|
|
"epoch": 1.7741596638655461,
|
|
"grad_norm": 12.675988020173412,
|
|
"learning_rate": 4.284301167991654e-06,
|
|
"loss": 1.684263825416565,
|
|
"step": 3378
|
|
},
|
|
{
|
|
"epoch": 1.77468487394958,
|
|
"grad_norm": 10.321127312765812,
|
|
"learning_rate": 4.2812767473353205e-06,
|
|
"loss": 1.0244954824447632,
|
|
"step": 3379
|
|
},
|
|
{
|
|
"epoch": 1.7752100840336134,
|
|
"grad_norm": 13.745296316011132,
|
|
"learning_rate": 4.27825259517336e-06,
|
|
"loss": 1.9200084209442139,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 1.7757352941176472,
|
|
"grad_norm": 9.358755329470656,
|
|
"learning_rate": 4.275228712635511e-06,
|
|
"loss": 1.404308795928955,
|
|
"step": 3381
|
|
},
|
|
{
|
|
"epoch": 1.7762605042016806,
|
|
"grad_norm": 9.831000105275193,
|
|
"learning_rate": 4.272205100851407e-06,
|
|
"loss": 1.6431167125701904,
|
|
"step": 3382
|
|
},
|
|
{
|
|
"epoch": 1.7767857142857144,
|
|
"grad_norm": 11.556416613911555,
|
|
"learning_rate": 4.269181760950584e-06,
|
|
"loss": 1.0737049579620361,
|
|
"step": 3383
|
|
},
|
|
{
|
|
"epoch": 1.7773109243697478,
|
|
"grad_norm": 10.021238929483955,
|
|
"learning_rate": 4.266158694062472e-06,
|
|
"loss": 1.6588964462280273,
|
|
"step": 3384
|
|
},
|
|
{
|
|
"epoch": 1.7778361344537816,
|
|
"grad_norm": 15.66006574277899,
|
|
"learning_rate": 4.263135901316406e-06,
|
|
"loss": 1.2914001941680908,
|
|
"step": 3385
|
|
},
|
|
{
|
|
"epoch": 1.778361344537815,
|
|
"grad_norm": 11.129240868288132,
|
|
"learning_rate": 4.2601133838416145e-06,
|
|
"loss": 1.1047592163085938,
|
|
"step": 3386
|
|
},
|
|
{
|
|
"epoch": 1.7788865546218489,
|
|
"grad_norm": 11.265281757073822,
|
|
"learning_rate": 4.257091142767221e-06,
|
|
"loss": 1.5732263326644897,
|
|
"step": 3387
|
|
},
|
|
{
|
|
"epoch": 1.7794117647058822,
|
|
"grad_norm": 7.82767456156718,
|
|
"learning_rate": 4.2540691792222485e-06,
|
|
"loss": 1.2557446956634521,
|
|
"step": 3388
|
|
},
|
|
{
|
|
"epoch": 1.779936974789916,
|
|
"grad_norm": 9.590464284366783,
|
|
"learning_rate": 4.251047494335616e-06,
|
|
"loss": 1.7582063674926758,
|
|
"step": 3389
|
|
},
|
|
{
|
|
"epoch": 1.7804621848739495,
|
|
"grad_norm": 14.349341692367407,
|
|
"learning_rate": 4.248026089236138e-06,
|
|
"loss": 1.4493112564086914,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 1.7809873949579833,
|
|
"grad_norm": 9.463167679822382,
|
|
"learning_rate": 4.245004965052526e-06,
|
|
"loss": 1.999086618423462,
|
|
"step": 3391
|
|
},
|
|
{
|
|
"epoch": 1.7815126050420167,
|
|
"grad_norm": 9.883404140634354,
|
|
"learning_rate": 4.2419841229133844e-06,
|
|
"loss": 1.8912391662597656,
|
|
"step": 3392
|
|
},
|
|
{
|
|
"epoch": 1.7820378151260505,
|
|
"grad_norm": 10.167465917169888,
|
|
"learning_rate": 4.238963563947212e-06,
|
|
"loss": 1.529426097869873,
|
|
"step": 3393
|
|
},
|
|
{
|
|
"epoch": 1.782563025210084,
|
|
"grad_norm": 10.573807092259026,
|
|
"learning_rate": 4.235943289282405e-06,
|
|
"loss": 3.1425058841705322,
|
|
"step": 3394
|
|
},
|
|
{
|
|
"epoch": 1.7830882352941178,
|
|
"grad_norm": 7.899886108099249,
|
|
"learning_rate": 4.23292330004725e-06,
|
|
"loss": 1.1963062286376953,
|
|
"step": 3395
|
|
},
|
|
{
|
|
"epoch": 1.7836134453781511,
|
|
"grad_norm": 9.0409811724384,
|
|
"learning_rate": 4.229903597369927e-06,
|
|
"loss": 0.9920728206634521,
|
|
"step": 3396
|
|
},
|
|
{
|
|
"epoch": 1.784138655462185,
|
|
"grad_norm": 18.720442044695666,
|
|
"learning_rate": 4.226884182378513e-06,
|
|
"loss": 1.8170883655548096,
|
|
"step": 3397
|
|
},
|
|
{
|
|
"epoch": 1.7846638655462184,
|
|
"grad_norm": 14.796618315149875,
|
|
"learning_rate": 4.2238650562009744e-06,
|
|
"loss": 1.424818515777588,
|
|
"step": 3398
|
|
},
|
|
{
|
|
"epoch": 1.7851890756302522,
|
|
"grad_norm": 8.456353077155875,
|
|
"learning_rate": 4.220846219965168e-06,
|
|
"loss": 1.2337695360183716,
|
|
"step": 3399
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 22.928900834329983,
|
|
"learning_rate": 4.217827674798845e-06,
|
|
"loss": 1.0628197193145752,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 1.7862394957983194,
|
|
"grad_norm": 13.308907523581551,
|
|
"learning_rate": 4.2148094218296485e-06,
|
|
"loss": 1.5001177787780762,
|
|
"step": 3401
|
|
},
|
|
{
|
|
"epoch": 1.7867647058823528,
|
|
"grad_norm": 12.124316450927475,
|
|
"learning_rate": 4.211791462185111e-06,
|
|
"loss": 1.252639889717102,
|
|
"step": 3402
|
|
},
|
|
{
|
|
"epoch": 1.7872899159663866,
|
|
"grad_norm": 8.049429858413667,
|
|
"learning_rate": 4.2087737969926545e-06,
|
|
"loss": 1.6508002281188965,
|
|
"step": 3403
|
|
},
|
|
{
|
|
"epoch": 1.78781512605042,
|
|
"grad_norm": 13.534543634030266,
|
|
"learning_rate": 4.205756427379592e-06,
|
|
"loss": 1.526294231414795,
|
|
"step": 3404
|
|
},
|
|
{
|
|
"epoch": 1.7883403361344539,
|
|
"grad_norm": 7.514592695953646,
|
|
"learning_rate": 4.202739354473127e-06,
|
|
"loss": 1.4986779689788818,
|
|
"step": 3405
|
|
},
|
|
{
|
|
"epoch": 1.7888655462184873,
|
|
"grad_norm": 22.581629755018152,
|
|
"learning_rate": 4.1997225794003515e-06,
|
|
"loss": 1.3045344352722168,
|
|
"step": 3406
|
|
},
|
|
{
|
|
"epoch": 1.789390756302521,
|
|
"grad_norm": 16.174143043144188,
|
|
"learning_rate": 4.196706103288244e-06,
|
|
"loss": 1.1019316911697388,
|
|
"step": 3407
|
|
},
|
|
{
|
|
"epoch": 1.7899159663865545,
|
|
"grad_norm": 10.848517249633964,
|
|
"learning_rate": 4.193689927263677e-06,
|
|
"loss": 1.3397021293640137,
|
|
"step": 3408
|
|
},
|
|
{
|
|
"epoch": 1.7904411764705883,
|
|
"grad_norm": 9.829259824797061,
|
|
"learning_rate": 4.190674052453405e-06,
|
|
"loss": 1.1827586889266968,
|
|
"step": 3409
|
|
},
|
|
{
|
|
"epoch": 1.7909663865546217,
|
|
"grad_norm": 11.140713687457362,
|
|
"learning_rate": 4.187658479984072e-06,
|
|
"loss": 1.4190447330474854,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 1.7914915966386555,
|
|
"grad_norm": 17.73096309388992,
|
|
"learning_rate": 4.184643210982209e-06,
|
|
"loss": 2.6395411491394043,
|
|
"step": 3411
|
|
},
|
|
{
|
|
"epoch": 1.792016806722689,
|
|
"grad_norm": 17.588221957204123,
|
|
"learning_rate": 4.181628246574236e-06,
|
|
"loss": 1.595414638519287,
|
|
"step": 3412
|
|
},
|
|
{
|
|
"epoch": 1.7925420168067228,
|
|
"grad_norm": 16.78050267554127,
|
|
"learning_rate": 4.178613587886455e-06,
|
|
"loss": 1.1024020910263062,
|
|
"step": 3413
|
|
},
|
|
{
|
|
"epoch": 1.7930672268907561,
|
|
"grad_norm": 13.800256123851637,
|
|
"learning_rate": 4.175599236045058e-06,
|
|
"loss": 1.1093014478683472,
|
|
"step": 3414
|
|
},
|
|
{
|
|
"epoch": 1.79359243697479,
|
|
"grad_norm": 16.134478004079746,
|
|
"learning_rate": 4.172585192176117e-06,
|
|
"loss": 2.031782865524292,
|
|
"step": 3415
|
|
},
|
|
{
|
|
"epoch": 1.7941176470588234,
|
|
"grad_norm": 12.224926683605693,
|
|
"learning_rate": 4.169571457405597e-06,
|
|
"loss": 1.4426114559173584,
|
|
"step": 3416
|
|
},
|
|
{
|
|
"epoch": 1.7946428571428572,
|
|
"grad_norm": 10.468125001528573,
|
|
"learning_rate": 4.166558032859339e-06,
|
|
"loss": 1.1929881572723389,
|
|
"step": 3417
|
|
},
|
|
{
|
|
"epoch": 1.7951680672268906,
|
|
"grad_norm": 13.003193560226709,
|
|
"learning_rate": 4.163544919663073e-06,
|
|
"loss": 1.6875252723693848,
|
|
"step": 3418
|
|
},
|
|
{
|
|
"epoch": 1.7956932773109244,
|
|
"grad_norm": 8.945094165404068,
|
|
"learning_rate": 4.160532118942411e-06,
|
|
"loss": 1.7581286430358887,
|
|
"step": 3419
|
|
},
|
|
{
|
|
"epoch": 1.7962184873949578,
|
|
"grad_norm": 9.69869538124073,
|
|
"learning_rate": 4.15751963182285e-06,
|
|
"loss": 1.492494821548462,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 1.7967436974789917,
|
|
"grad_norm": 12.917286761789113,
|
|
"learning_rate": 4.154507459429769e-06,
|
|
"loss": 1.3444740772247314,
|
|
"step": 3421
|
|
},
|
|
{
|
|
"epoch": 1.7972689075630253,
|
|
"grad_norm": 10.12759682759791,
|
|
"learning_rate": 4.1514956028884265e-06,
|
|
"loss": 2.462442636489868,
|
|
"step": 3422
|
|
},
|
|
{
|
|
"epoch": 1.7977941176470589,
|
|
"grad_norm": 9.11132367715039,
|
|
"learning_rate": 4.148484063323969e-06,
|
|
"loss": 1.1968019008636475,
|
|
"step": 3423
|
|
},
|
|
{
|
|
"epoch": 1.7983193277310925,
|
|
"grad_norm": 9.751469363461084,
|
|
"learning_rate": 4.14547284186142e-06,
|
|
"loss": 1.1196238994598389,
|
|
"step": 3424
|
|
},
|
|
{
|
|
"epoch": 1.798844537815126,
|
|
"grad_norm": 14.281769005761861,
|
|
"learning_rate": 4.142461939625685e-06,
|
|
"loss": 1.6557984352111816,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 1.7993697478991597,
|
|
"grad_norm": 10.895223535255173,
|
|
"learning_rate": 4.13945135774155e-06,
|
|
"loss": 1.4526193141937256,
|
|
"step": 3426
|
|
},
|
|
{
|
|
"epoch": 1.7998949579831933,
|
|
"grad_norm": 15.280765866171025,
|
|
"learning_rate": 4.136441097333683e-06,
|
|
"loss": 2.054003953933716,
|
|
"step": 3427
|
|
},
|
|
{
|
|
"epoch": 1.800420168067227,
|
|
"grad_norm": 9.246251364954835,
|
|
"learning_rate": 4.133431159526631e-06,
|
|
"loss": 2.2219438552856445,
|
|
"step": 3428
|
|
},
|
|
{
|
|
"epoch": 1.8009453781512605,
|
|
"grad_norm": 13.129971947418955,
|
|
"learning_rate": 4.13042154544482e-06,
|
|
"loss": 1.625846266746521,
|
|
"step": 3429
|
|
},
|
|
{
|
|
"epoch": 1.8014705882352942,
|
|
"grad_norm": 12.978736800632362,
|
|
"learning_rate": 4.127412256212554e-06,
|
|
"loss": 2.6511335372924805,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 1.8019957983193278,
|
|
"grad_norm": 9.79788739331427,
|
|
"learning_rate": 4.12440329295402e-06,
|
|
"loss": 1.8296453952789307,
|
|
"step": 3431
|
|
},
|
|
{
|
|
"epoch": 1.8025210084033614,
|
|
"grad_norm": 9.167691812725113,
|
|
"learning_rate": 4.121394656793279e-06,
|
|
"loss": 1.5473147630691528,
|
|
"step": 3432
|
|
},
|
|
{
|
|
"epoch": 1.803046218487395,
|
|
"grad_norm": 16.886183234527234,
|
|
"learning_rate": 4.1183863488542686e-06,
|
|
"loss": 1.3791701793670654,
|
|
"step": 3433
|
|
},
|
|
{
|
|
"epoch": 1.8035714285714286,
|
|
"grad_norm": 11.204075303756833,
|
|
"learning_rate": 4.1153783702608105e-06,
|
|
"loss": 2.1408419609069824,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 1.8040966386554622,
|
|
"grad_norm": 10.731028395399425,
|
|
"learning_rate": 4.112370722136597e-06,
|
|
"loss": 1.5430549383163452,
|
|
"step": 3435
|
|
},
|
|
{
|
|
"epoch": 1.8046218487394958,
|
|
"grad_norm": 9.869315215658188,
|
|
"learning_rate": 4.109363405605198e-06,
|
|
"loss": 1.401909351348877,
|
|
"step": 3436
|
|
},
|
|
{
|
|
"epoch": 1.8051470588235294,
|
|
"grad_norm": 9.031705262630664,
|
|
"learning_rate": 4.106356421790062e-06,
|
|
"loss": 1.6186398267745972,
|
|
"step": 3437
|
|
},
|
|
{
|
|
"epoch": 1.805672268907563,
|
|
"grad_norm": 11.210346683322042,
|
|
"learning_rate": 4.103349771814512e-06,
|
|
"loss": 0.7343453168869019,
|
|
"step": 3438
|
|
},
|
|
{
|
|
"epoch": 1.8061974789915967,
|
|
"grad_norm": 9.795269402981832,
|
|
"learning_rate": 4.100343456801747e-06,
|
|
"loss": 1.707342505455017,
|
|
"step": 3439
|
|
},
|
|
{
|
|
"epoch": 1.8067226890756303,
|
|
"grad_norm": 9.607912720799668,
|
|
"learning_rate": 4.0973374778748385e-06,
|
|
"loss": 2.2637925148010254,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 1.8072478991596639,
|
|
"grad_norm": 9.30696000541257,
|
|
"learning_rate": 4.094331836156732e-06,
|
|
"loss": 1.817786455154419,
|
|
"step": 3441
|
|
},
|
|
{
|
|
"epoch": 1.8077731092436975,
|
|
"grad_norm": 9.120924809969319,
|
|
"learning_rate": 4.091326532770253e-06,
|
|
"loss": 1.3939745426177979,
|
|
"step": 3442
|
|
},
|
|
{
|
|
"epoch": 1.808298319327731,
|
|
"grad_norm": 12.791618153069194,
|
|
"learning_rate": 4.088321568838095e-06,
|
|
"loss": 1.0895910263061523,
|
|
"step": 3443
|
|
},
|
|
{
|
|
"epoch": 1.8088235294117647,
|
|
"grad_norm": 15.805091519866417,
|
|
"learning_rate": 4.0853169454828245e-06,
|
|
"loss": 1.1233513355255127,
|
|
"step": 3444
|
|
},
|
|
{
|
|
"epoch": 1.8093487394957983,
|
|
"grad_norm": 9.67114073383788,
|
|
"learning_rate": 4.082312663826886e-06,
|
|
"loss": 1.3852238655090332,
|
|
"step": 3445
|
|
},
|
|
{
|
|
"epoch": 1.809873949579832,
|
|
"grad_norm": 13.11357620584965,
|
|
"learning_rate": 4.07930872499259e-06,
|
|
"loss": 1.895456075668335,
|
|
"step": 3446
|
|
},
|
|
{
|
|
"epoch": 1.8103991596638656,
|
|
"grad_norm": 16.472186007534404,
|
|
"learning_rate": 4.0763051301021225e-06,
|
|
"loss": 1.219724416732788,
|
|
"step": 3447
|
|
},
|
|
{
|
|
"epoch": 1.8109243697478992,
|
|
"grad_norm": 13.682498737920504,
|
|
"learning_rate": 4.07330188027754e-06,
|
|
"loss": 1.4474141597747803,
|
|
"step": 3448
|
|
},
|
|
{
|
|
"epoch": 1.8114495798319328,
|
|
"grad_norm": 16.945705699886382,
|
|
"learning_rate": 4.070298976640772e-06,
|
|
"loss": 1.7105793952941895,
|
|
"step": 3449
|
|
},
|
|
{
|
|
"epoch": 1.8119747899159664,
|
|
"grad_norm": 13.491181501760696,
|
|
"learning_rate": 4.067296420313616e-06,
|
|
"loss": 1.7510032653808594,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 1.8125,
|
|
"grad_norm": 9.557034896649517,
|
|
"learning_rate": 4.0642942124177405e-06,
|
|
"loss": 1.811830997467041,
|
|
"step": 3451
|
|
},
|
|
{
|
|
"epoch": 1.8130252100840336,
|
|
"grad_norm": 6.855976914768561,
|
|
"learning_rate": 4.061292354074683e-06,
|
|
"loss": 1.4671962261199951,
|
|
"step": 3452
|
|
},
|
|
{
|
|
"epoch": 1.8135504201680672,
|
|
"grad_norm": 10.792420388671536,
|
|
"learning_rate": 4.058290846405856e-06,
|
|
"loss": 1.3182471990585327,
|
|
"step": 3453
|
|
},
|
|
{
|
|
"epoch": 1.8140756302521008,
|
|
"grad_norm": 23.892205682541743,
|
|
"learning_rate": 4.055289690532533e-06,
|
|
"loss": 1.6939082145690918,
|
|
"step": 3454
|
|
},
|
|
{
|
|
"epoch": 1.8146008403361344,
|
|
"grad_norm": 7.0967763367433685,
|
|
"learning_rate": 4.052288887575859e-06,
|
|
"loss": 1.5236637592315674,
|
|
"step": 3455
|
|
},
|
|
{
|
|
"epoch": 1.815126050420168,
|
|
"grad_norm": 10.597848400874861,
|
|
"learning_rate": 4.04928843865685e-06,
|
|
"loss": 1.867397427558899,
|
|
"step": 3456
|
|
},
|
|
{
|
|
"epoch": 1.8156512605042017,
|
|
"grad_norm": 11.652814350156293,
|
|
"learning_rate": 4.046288344896388e-06,
|
|
"loss": 1.8498456478118896,
|
|
"step": 3457
|
|
},
|
|
{
|
|
"epoch": 1.8161764705882353,
|
|
"grad_norm": 13.034029284683328,
|
|
"learning_rate": 4.043288607415219e-06,
|
|
"loss": 1.4994248151779175,
|
|
"step": 3458
|
|
},
|
|
{
|
|
"epoch": 1.816701680672269,
|
|
"grad_norm": 12.089453086650884,
|
|
"learning_rate": 4.040289227333961e-06,
|
|
"loss": 1.8718979358673096,
|
|
"step": 3459
|
|
},
|
|
{
|
|
"epoch": 1.8172268907563025,
|
|
"grad_norm": 14.745646890122712,
|
|
"learning_rate": 4.0372902057730965e-06,
|
|
"loss": 1.2499297857284546,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 1.8177521008403361,
|
|
"grad_norm": 8.929403476902454,
|
|
"learning_rate": 4.034291543852973e-06,
|
|
"loss": 1.5101208686828613,
|
|
"step": 3461
|
|
},
|
|
{
|
|
"epoch": 1.8182773109243697,
|
|
"grad_norm": 12.343841661330574,
|
|
"learning_rate": 4.031293242693804e-06,
|
|
"loss": 2.445464611053467,
|
|
"step": 3462
|
|
},
|
|
{
|
|
"epoch": 1.8188025210084033,
|
|
"grad_norm": 12.469468961118855,
|
|
"learning_rate": 4.02829530341567e-06,
|
|
"loss": 1.645592451095581,
|
|
"step": 3463
|
|
},
|
|
{
|
|
"epoch": 1.819327731092437,
|
|
"grad_norm": 9.840940280615914,
|
|
"learning_rate": 4.025297727138515e-06,
|
|
"loss": 0.6531387567520142,
|
|
"step": 3464
|
|
},
|
|
{
|
|
"epoch": 1.8198529411764706,
|
|
"grad_norm": 11.69707365477793,
|
|
"learning_rate": 4.022300514982146e-06,
|
|
"loss": 1.9656018018722534,
|
|
"step": 3465
|
|
},
|
|
{
|
|
"epoch": 1.8203781512605042,
|
|
"grad_norm": 14.598663611267417,
|
|
"learning_rate": 4.019303668066238e-06,
|
|
"loss": 1.7297413349151611,
|
|
"step": 3466
|
|
},
|
|
{
|
|
"epoch": 1.8209033613445378,
|
|
"grad_norm": 8.425037055214741,
|
|
"learning_rate": 4.016307187510323e-06,
|
|
"loss": 1.152799367904663,
|
|
"step": 3467
|
|
},
|
|
{
|
|
"epoch": 1.8214285714285714,
|
|
"grad_norm": 10.563128855927673,
|
|
"learning_rate": 4.013311074433804e-06,
|
|
"loss": 1.5894992351531982,
|
|
"step": 3468
|
|
},
|
|
{
|
|
"epoch": 1.821953781512605,
|
|
"grad_norm": 15.959628200021928,
|
|
"learning_rate": 4.010315329955941e-06,
|
|
"loss": 3.25376558303833,
|
|
"step": 3469
|
|
},
|
|
{
|
|
"epoch": 1.8224789915966386,
|
|
"grad_norm": 7.335622259363143,
|
|
"learning_rate": 4.007319955195857e-06,
|
|
"loss": 1.4387871026992798,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 1.8230042016806722,
|
|
"grad_norm": 17.588397483196,
|
|
"learning_rate": 4.004324951272542e-06,
|
|
"loss": 2.3955492973327637,
|
|
"step": 3471
|
|
},
|
|
{
|
|
"epoch": 1.8235294117647058,
|
|
"grad_norm": 12.966950041118393,
|
|
"learning_rate": 4.001330319304839e-06,
|
|
"loss": 1.4235239028930664,
|
|
"step": 3472
|
|
},
|
|
{
|
|
"epoch": 1.8240546218487395,
|
|
"grad_norm": 9.704952732966582,
|
|
"learning_rate": 3.998336060411459e-06,
|
|
"loss": 1.40226411819458,
|
|
"step": 3473
|
|
},
|
|
{
|
|
"epoch": 1.824579831932773,
|
|
"grad_norm": 10.06606318007555,
|
|
"learning_rate": 3.99534217571097e-06,
|
|
"loss": 1.7859091758728027,
|
|
"step": 3474
|
|
},
|
|
{
|
|
"epoch": 1.8251050420168067,
|
|
"grad_norm": 10.228912141924729,
|
|
"learning_rate": 3.992348666321803e-06,
|
|
"loss": 1.8350114822387695,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 1.8256302521008403,
|
|
"grad_norm": 11.99436743543487,
|
|
"learning_rate": 3.989355533362246e-06,
|
|
"loss": 1.480570912361145,
|
|
"step": 3476
|
|
},
|
|
{
|
|
"epoch": 1.826155462184874,
|
|
"grad_norm": 10.298924229721873,
|
|
"learning_rate": 3.986362777950448e-06,
|
|
"loss": 1.5259010791778564,
|
|
"step": 3477
|
|
},
|
|
{
|
|
"epoch": 1.8266806722689075,
|
|
"grad_norm": 12.035066712497297,
|
|
"learning_rate": 3.983370401204415e-06,
|
|
"loss": 1.3747330904006958,
|
|
"step": 3478
|
|
},
|
|
{
|
|
"epoch": 1.8272058823529411,
|
|
"grad_norm": 8.072435006498589,
|
|
"learning_rate": 3.980378404242014e-06,
|
|
"loss": 1.5108224153518677,
|
|
"step": 3479
|
|
},
|
|
{
|
|
"epoch": 1.8277310924369747,
|
|
"grad_norm": 20.28431216097508,
|
|
"learning_rate": 3.9773867881809705e-06,
|
|
"loss": 1.5896763801574707,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 1.8282563025210083,
|
|
"grad_norm": 8.495392722718748,
|
|
"learning_rate": 3.9743955541388645e-06,
|
|
"loss": 1.28151535987854,
|
|
"step": 3481
|
|
},
|
|
{
|
|
"epoch": 1.8287815126050422,
|
|
"grad_norm": 11.6212349030481,
|
|
"learning_rate": 3.971404703233137e-06,
|
|
"loss": 2.096107006072998,
|
|
"step": 3482
|
|
},
|
|
{
|
|
"epoch": 1.8293067226890756,
|
|
"grad_norm": 10.387461788306586,
|
|
"learning_rate": 3.968414236581083e-06,
|
|
"loss": 1.4754010438919067,
|
|
"step": 3483
|
|
},
|
|
{
|
|
"epoch": 1.8298319327731094,
|
|
"grad_norm": 12.209172961359403,
|
|
"learning_rate": 3.965424155299854e-06,
|
|
"loss": 1.67756986618042,
|
|
"step": 3484
|
|
},
|
|
{
|
|
"epoch": 1.8303571428571428,
|
|
"grad_norm": 6.670216689112044,
|
|
"learning_rate": 3.962434460506459e-06,
|
|
"loss": 1.754809021949768,
|
|
"step": 3485
|
|
},
|
|
{
|
|
"epoch": 1.8308823529411766,
|
|
"grad_norm": 17.98808673936292,
|
|
"learning_rate": 3.959445153317764e-06,
|
|
"loss": 2.6715197563171387,
|
|
"step": 3486
|
|
},
|
|
{
|
|
"epoch": 1.83140756302521,
|
|
"grad_norm": 13.205661357485651,
|
|
"learning_rate": 3.956456234850487e-06,
|
|
"loss": 1.5148085355758667,
|
|
"step": 3487
|
|
},
|
|
{
|
|
"epoch": 1.8319327731092439,
|
|
"grad_norm": 8.802016780785296,
|
|
"learning_rate": 3.953467706221202e-06,
|
|
"loss": 1.7863610982894897,
|
|
"step": 3488
|
|
},
|
|
{
|
|
"epoch": 1.8324579831932772,
|
|
"grad_norm": 10.038342880235064,
|
|
"learning_rate": 3.950479568546336e-06,
|
|
"loss": 0.9457213282585144,
|
|
"step": 3489
|
|
},
|
|
{
|
|
"epoch": 1.832983193277311,
|
|
"grad_norm": 10.635568910590035,
|
|
"learning_rate": 3.947491822942174e-06,
|
|
"loss": 1.2064887285232544,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 1.8335084033613445,
|
|
"grad_norm": 20.290554699412123,
|
|
"learning_rate": 3.9445044705248525e-06,
|
|
"loss": 1.7002025842666626,
|
|
"step": 3491
|
|
},
|
|
{
|
|
"epoch": 1.8340336134453783,
|
|
"grad_norm": 12.039964517380843,
|
|
"learning_rate": 3.941517512410357e-06,
|
|
"loss": 1.3340742588043213,
|
|
"step": 3492
|
|
},
|
|
{
|
|
"epoch": 1.8345588235294117,
|
|
"grad_norm": 8.364285713488247,
|
|
"learning_rate": 3.938530949714533e-06,
|
|
"loss": 1.5468111038208008,
|
|
"step": 3493
|
|
},
|
|
{
|
|
"epoch": 1.8350840336134455,
|
|
"grad_norm": 8.638257087094814,
|
|
"learning_rate": 3.935544783553072e-06,
|
|
"loss": 1.4965511560440063,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 1.835609243697479,
|
|
"grad_norm": 7.415632342102558,
|
|
"learning_rate": 3.932559015041523e-06,
|
|
"loss": 1.7324585914611816,
|
|
"step": 3495
|
|
},
|
|
{
|
|
"epoch": 1.8361344537815127,
|
|
"grad_norm": 10.51269812806555,
|
|
"learning_rate": 3.929573645295278e-06,
|
|
"loss": 1.1609418392181396,
|
|
"step": 3496
|
|
},
|
|
{
|
|
"epoch": 1.8366596638655461,
|
|
"grad_norm": 15.619137489213774,
|
|
"learning_rate": 3.926588675429591e-06,
|
|
"loss": 1.2084197998046875,
|
|
"step": 3497
|
|
},
|
|
{
|
|
"epoch": 1.83718487394958,
|
|
"grad_norm": 12.066436472992145,
|
|
"learning_rate": 3.9236041065595596e-06,
|
|
"loss": 1.248793363571167,
|
|
"step": 3498
|
|
},
|
|
{
|
|
"epoch": 1.8377100840336134,
|
|
"grad_norm": 9.580304526892963,
|
|
"learning_rate": 3.920619939800131e-06,
|
|
"loss": 1.2989832162857056,
|
|
"step": 3499
|
|
},
|
|
{
|
|
"epoch": 1.8382352941176472,
|
|
"grad_norm": 6.726292567384109,
|
|
"learning_rate": 3.917636176266105e-06,
|
|
"loss": 1.5274391174316406,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 1.8387605042016806,
|
|
"grad_norm": 6.147539842834988,
|
|
"learning_rate": 3.914652817072132e-06,
|
|
"loss": 1.577737808227539,
|
|
"step": 3501
|
|
},
|
|
{
|
|
"epoch": 1.8392857142857144,
|
|
"grad_norm": 11.201507595836313,
|
|
"learning_rate": 3.9116698633327076e-06,
|
|
"loss": 1.575178861618042,
|
|
"step": 3502
|
|
},
|
|
{
|
|
"epoch": 1.8398109243697478,
|
|
"grad_norm": 8.926186371349456,
|
|
"learning_rate": 3.908687316162178e-06,
|
|
"loss": 1.581723928451538,
|
|
"step": 3503
|
|
},
|
|
{
|
|
"epoch": 1.8403361344537816,
|
|
"grad_norm": 11.629609827402959,
|
|
"learning_rate": 3.905705176674736e-06,
|
|
"loss": 0.8478154540061951,
|
|
"step": 3504
|
|
},
|
|
{
|
|
"epoch": 1.840861344537815,
|
|
"grad_norm": 16.913352119962365,
|
|
"learning_rate": 3.902723445984425e-06,
|
|
"loss": 1.2822133302688599,
|
|
"step": 3505
|
|
},
|
|
{
|
|
"epoch": 1.8413865546218489,
|
|
"grad_norm": 10.084626311786446,
|
|
"learning_rate": 3.899742125205135e-06,
|
|
"loss": 1.6672027111053467,
|
|
"step": 3506
|
|
},
|
|
{
|
|
"epoch": 1.8419117647058822,
|
|
"grad_norm": 17.855317629102217,
|
|
"learning_rate": 3.896761215450598e-06,
|
|
"loss": 1.7304069995880127,
|
|
"step": 3507
|
|
},
|
|
{
|
|
"epoch": 1.842436974789916,
|
|
"grad_norm": 11.73475424279721,
|
|
"learning_rate": 3.8937807178344004e-06,
|
|
"loss": 1.579456090927124,
|
|
"step": 3508
|
|
},
|
|
{
|
|
"epoch": 1.8429621848739495,
|
|
"grad_norm": 9.635595518117883,
|
|
"learning_rate": 3.890800633469968e-06,
|
|
"loss": 2.3471951484680176,
|
|
"step": 3509
|
|
},
|
|
{
|
|
"epoch": 1.8434873949579833,
|
|
"grad_norm": 18.969351341207172,
|
|
"learning_rate": 3.887820963470575e-06,
|
|
"loss": 2.481104850769043,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 1.8440126050420167,
|
|
"grad_norm": 14.561536676382035,
|
|
"learning_rate": 3.8848417089493416e-06,
|
|
"loss": 1.7346417903900146,
|
|
"step": 3511
|
|
},
|
|
{
|
|
"epoch": 1.8445378151260505,
|
|
"grad_norm": 14.412763962717026,
|
|
"learning_rate": 3.881862871019232e-06,
|
|
"loss": 1.4879249334335327,
|
|
"step": 3512
|
|
},
|
|
{
|
|
"epoch": 1.845063025210084,
|
|
"grad_norm": 12.01648701659342,
|
|
"learning_rate": 3.878884450793053e-06,
|
|
"loss": 2.0206332206726074,
|
|
"step": 3513
|
|
},
|
|
{
|
|
"epoch": 1.8455882352941178,
|
|
"grad_norm": 10.921786140963496,
|
|
"learning_rate": 3.875906449383457e-06,
|
|
"loss": 1.617268681526184,
|
|
"step": 3514
|
|
},
|
|
{
|
|
"epoch": 1.8461134453781511,
|
|
"grad_norm": 13.363315665679083,
|
|
"learning_rate": 3.872928867902941e-06,
|
|
"loss": 1.9489585161209106,
|
|
"step": 3515
|
|
},
|
|
{
|
|
"epoch": 1.846638655462185,
|
|
"grad_norm": 20.84039375418646,
|
|
"learning_rate": 3.869951707463844e-06,
|
|
"loss": 1.4352951049804688,
|
|
"step": 3516
|
|
},
|
|
{
|
|
"epoch": 1.8471638655462184,
|
|
"grad_norm": 11.08121858346143,
|
|
"learning_rate": 3.866974969178348e-06,
|
|
"loss": 2.229334592819214,
|
|
"step": 3517
|
|
},
|
|
{
|
|
"epoch": 1.8476890756302522,
|
|
"grad_norm": 9.248862539143877,
|
|
"learning_rate": 3.863998654158473e-06,
|
|
"loss": 1.4621502161026,
|
|
"step": 3518
|
|
},
|
|
{
|
|
"epoch": 1.8482142857142856,
|
|
"grad_norm": 16.4704502139439,
|
|
"learning_rate": 3.861022763516091e-06,
|
|
"loss": 2.312044143676758,
|
|
"step": 3519
|
|
},
|
|
{
|
|
"epoch": 1.8487394957983194,
|
|
"grad_norm": 8.787902743848047,
|
|
"learning_rate": 3.858047298362905e-06,
|
|
"loss": 1.7228615283966064,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 1.8492647058823528,
|
|
"grad_norm": 9.96677635268189,
|
|
"learning_rate": 3.855072259810465e-06,
|
|
"loss": 1.3734462261199951,
|
|
"step": 3521
|
|
},
|
|
{
|
|
"epoch": 1.8497899159663866,
|
|
"grad_norm": 10.009262130324313,
|
|
"learning_rate": 3.852097648970159e-06,
|
|
"loss": 2.19566011428833,
|
|
"step": 3522
|
|
},
|
|
{
|
|
"epoch": 1.85031512605042,
|
|
"grad_norm": 7.524826516500948,
|
|
"learning_rate": 3.849123466953217e-06,
|
|
"loss": 1.8094148635864258,
|
|
"step": 3523
|
|
},
|
|
{
|
|
"epoch": 1.8508403361344539,
|
|
"grad_norm": 6.928065305860581,
|
|
"learning_rate": 3.846149714870709e-06,
|
|
"loss": 1.797644019126892,
|
|
"step": 3524
|
|
},
|
|
{
|
|
"epoch": 1.8513655462184873,
|
|
"grad_norm": 8.994073872711708,
|
|
"learning_rate": 3.8431763938335415e-06,
|
|
"loss": 1.77252197265625,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 1.851890756302521,
|
|
"grad_norm": 12.96452628798218,
|
|
"learning_rate": 3.840203504952462e-06,
|
|
"loss": 1.1664371490478516,
|
|
"step": 3526
|
|
},
|
|
{
|
|
"epoch": 1.8524159663865545,
|
|
"grad_norm": 18.83396205571419,
|
|
"learning_rate": 3.837231049338057e-06,
|
|
"loss": 1.7924728393554688,
|
|
"step": 3527
|
|
},
|
|
{
|
|
"epoch": 1.8529411764705883,
|
|
"grad_norm": 15.45289613980103,
|
|
"learning_rate": 3.834259028100753e-06,
|
|
"loss": 1.0718178749084473,
|
|
"step": 3528
|
|
},
|
|
{
|
|
"epoch": 1.8534663865546217,
|
|
"grad_norm": 16.842392788992143,
|
|
"learning_rate": 3.831287442350806e-06,
|
|
"loss": 1.0396767854690552,
|
|
"step": 3529
|
|
},
|
|
{
|
|
"epoch": 1.8539915966386555,
|
|
"grad_norm": 8.299756594769104,
|
|
"learning_rate": 3.828316293198321e-06,
|
|
"loss": 1.5213674306869507,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 1.854516806722689,
|
|
"grad_norm": 20.20417577357553,
|
|
"learning_rate": 3.8253455817532305e-06,
|
|
"loss": 1.7097625732421875,
|
|
"step": 3531
|
|
},
|
|
{
|
|
"epoch": 1.8550420168067228,
|
|
"grad_norm": 6.797592140416188,
|
|
"learning_rate": 3.822375309125309e-06,
|
|
"loss": 1.6455717086791992,
|
|
"step": 3532
|
|
},
|
|
{
|
|
"epoch": 1.8555672268907561,
|
|
"grad_norm": 19.2025588245244,
|
|
"learning_rate": 3.819405476424164e-06,
|
|
"loss": 1.8461918830871582,
|
|
"step": 3533
|
|
},
|
|
{
|
|
"epoch": 1.85609243697479,
|
|
"grad_norm": 9.984185122289594,
|
|
"learning_rate": 3.816436084759239e-06,
|
|
"loss": 1.5979926586151123,
|
|
"step": 3534
|
|
},
|
|
{
|
|
"epoch": 1.8566176470588234,
|
|
"grad_norm": 11.212003212034826,
|
|
"learning_rate": 3.8134671352398157e-06,
|
|
"loss": 2.200836181640625,
|
|
"step": 3535
|
|
},
|
|
{
|
|
"epoch": 1.8571428571428572,
|
|
"grad_norm": 7.719172038204905,
|
|
"learning_rate": 3.810498628975007e-06,
|
|
"loss": 1.2535037994384766,
|
|
"step": 3536
|
|
},
|
|
{
|
|
"epoch": 1.8576680672268906,
|
|
"grad_norm": 10.78374603181833,
|
|
"learning_rate": 3.8075305670737605e-06,
|
|
"loss": 1.2252781391143799,
|
|
"step": 3537
|
|
},
|
|
{
|
|
"epoch": 1.8581932773109244,
|
|
"grad_norm": 21.675118208744227,
|
|
"learning_rate": 3.804562950644861e-06,
|
|
"loss": 1.6327319145202637,
|
|
"step": 3538
|
|
},
|
|
{
|
|
"epoch": 1.8587184873949578,
|
|
"grad_norm": 10.943859360268,
|
|
"learning_rate": 3.8015957807969247e-06,
|
|
"loss": 1.930145502090454,
|
|
"step": 3539
|
|
},
|
|
{
|
|
"epoch": 1.8592436974789917,
|
|
"grad_norm": 10.628317840655198,
|
|
"learning_rate": 3.7986290586383995e-06,
|
|
"loss": 1.1274309158325195,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 1.8597689075630253,
|
|
"grad_norm": 14.694661522647797,
|
|
"learning_rate": 3.795662785277568e-06,
|
|
"loss": 1.229578971862793,
|
|
"step": 3541
|
|
},
|
|
{
|
|
"epoch": 1.8602941176470589,
|
|
"grad_norm": 12.117851440355302,
|
|
"learning_rate": 3.792696961822546e-06,
|
|
"loss": 1.7704070806503296,
|
|
"step": 3542
|
|
},
|
|
{
|
|
"epoch": 1.8608193277310925,
|
|
"grad_norm": 12.57542697006448,
|
|
"learning_rate": 3.7897315893812796e-06,
|
|
"loss": 1.3353325128555298,
|
|
"step": 3543
|
|
},
|
|
{
|
|
"epoch": 1.861344537815126,
|
|
"grad_norm": 9.029109971955679,
|
|
"learning_rate": 3.786766669061545e-06,
|
|
"loss": 1.8322136402130127,
|
|
"step": 3544
|
|
},
|
|
{
|
|
"epoch": 1.8618697478991597,
|
|
"grad_norm": 12.622751677546477,
|
|
"learning_rate": 3.783802201970953e-06,
|
|
"loss": 1.0906827449798584,
|
|
"step": 3545
|
|
},
|
|
{
|
|
"epoch": 1.8623949579831933,
|
|
"grad_norm": 8.854469617171919,
|
|
"learning_rate": 3.780838189216943e-06,
|
|
"loss": 1.5446804761886597,
|
|
"step": 3546
|
|
},
|
|
{
|
|
"epoch": 1.862920168067227,
|
|
"grad_norm": 14.372875903305284,
|
|
"learning_rate": 3.7778746319067867e-06,
|
|
"loss": 2.2716293334960938,
|
|
"step": 3547
|
|
},
|
|
{
|
|
"epoch": 1.8634453781512605,
|
|
"grad_norm": 13.435472401663668,
|
|
"learning_rate": 3.774911531147582e-06,
|
|
"loss": 1.5395889282226562,
|
|
"step": 3548
|
|
},
|
|
{
|
|
"epoch": 1.8639705882352942,
|
|
"grad_norm": 15.00366731481477,
|
|
"learning_rate": 3.7719488880462596e-06,
|
|
"loss": 1.8546674251556396,
|
|
"step": 3549
|
|
},
|
|
{
|
|
"epoch": 1.8644957983193278,
|
|
"grad_norm": 11.27849678903175,
|
|
"learning_rate": 3.7689867037095756e-06,
|
|
"loss": 1.7650783061981201,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 1.8650210084033614,
|
|
"grad_norm": 10.62881995178189,
|
|
"learning_rate": 3.7660249792441197e-06,
|
|
"loss": 1.1084260940551758,
|
|
"step": 3551
|
|
},
|
|
{
|
|
"epoch": 1.865546218487395,
|
|
"grad_norm": 8.679418209805792,
|
|
"learning_rate": 3.763063715756306e-06,
|
|
"loss": 1.3064913749694824,
|
|
"step": 3552
|
|
},
|
|
{
|
|
"epoch": 1.8660714285714286,
|
|
"grad_norm": 9.825975361259932,
|
|
"learning_rate": 3.7601029143523767e-06,
|
|
"loss": 1.6261317729949951,
|
|
"step": 3553
|
|
},
|
|
{
|
|
"epoch": 1.8665966386554622,
|
|
"grad_norm": 13.022200849444895,
|
|
"learning_rate": 3.7571425761384038e-06,
|
|
"loss": 1.4149929285049438,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 1.8671218487394958,
|
|
"grad_norm": 9.208151871527573,
|
|
"learning_rate": 3.7541827022202838e-06,
|
|
"loss": 1.4480232000350952,
|
|
"step": 3555
|
|
},
|
|
{
|
|
"epoch": 1.8676470588235294,
|
|
"grad_norm": 20.116410208061595,
|
|
"learning_rate": 3.751223293703741e-06,
|
|
"loss": 1.8733075857162476,
|
|
"step": 3556
|
|
},
|
|
{
|
|
"epoch": 1.868172268907563,
|
|
"grad_norm": 10.950879834930689,
|
|
"learning_rate": 3.748264351694324e-06,
|
|
"loss": 1.5732142925262451,
|
|
"step": 3557
|
|
},
|
|
{
|
|
"epoch": 1.8686974789915967,
|
|
"grad_norm": 14.323168406084772,
|
|
"learning_rate": 3.7453058772974115e-06,
|
|
"loss": 2.260406732559204,
|
|
"step": 3558
|
|
},
|
|
{
|
|
"epoch": 1.8692226890756303,
|
|
"grad_norm": 12.112730133012303,
|
|
"learning_rate": 3.7423478716182026e-06,
|
|
"loss": 1.5210075378417969,
|
|
"step": 3559
|
|
},
|
|
{
|
|
"epoch": 1.8697478991596639,
|
|
"grad_norm": 9.728691238390134,
|
|
"learning_rate": 3.7393903357617235e-06,
|
|
"loss": 1.485840916633606,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 1.8702731092436975,
|
|
"grad_norm": 7.182419658455595,
|
|
"learning_rate": 3.7364332708328232e-06,
|
|
"loss": 1.4217307567596436,
|
|
"step": 3561
|
|
},
|
|
{
|
|
"epoch": 1.870798319327731,
|
|
"grad_norm": 17.93417450916989,
|
|
"learning_rate": 3.7334766779361797e-06,
|
|
"loss": 1.7334325313568115,
|
|
"step": 3562
|
|
},
|
|
{
|
|
"epoch": 1.8713235294117647,
|
|
"grad_norm": 8.933173598649242,
|
|
"learning_rate": 3.7305205581762895e-06,
|
|
"loss": 1.5833280086517334,
|
|
"step": 3563
|
|
},
|
|
{
|
|
"epoch": 1.8718487394957983,
|
|
"grad_norm": 11.260953763552077,
|
|
"learning_rate": 3.727564912657472e-06,
|
|
"loss": 1.8907787799835205,
|
|
"step": 3564
|
|
},
|
|
{
|
|
"epoch": 1.872373949579832,
|
|
"grad_norm": 15.444795603802612,
|
|
"learning_rate": 3.7246097424838746e-06,
|
|
"loss": 1.2487103939056396,
|
|
"step": 3565
|
|
},
|
|
{
|
|
"epoch": 1.8728991596638656,
|
|
"grad_norm": 20.851161433135722,
|
|
"learning_rate": 3.721655048759464e-06,
|
|
"loss": 1.5314972400665283,
|
|
"step": 3566
|
|
},
|
|
{
|
|
"epoch": 1.8734243697478992,
|
|
"grad_norm": 10.964787905031088,
|
|
"learning_rate": 3.718700832588027e-06,
|
|
"loss": 1.34340238571167,
|
|
"step": 3567
|
|
},
|
|
{
|
|
"epoch": 1.8739495798319328,
|
|
"grad_norm": 13.500889527296094,
|
|
"learning_rate": 3.715747095073173e-06,
|
|
"loss": 1.7862898111343384,
|
|
"step": 3568
|
|
},
|
|
{
|
|
"epoch": 1.8744747899159664,
|
|
"grad_norm": 8.227690269926454,
|
|
"learning_rate": 3.712793837318338e-06,
|
|
"loss": 1.5169625282287598,
|
|
"step": 3569
|
|
},
|
|
{
|
|
"epoch": 1.875,
|
|
"grad_norm": 11.56621403964026,
|
|
"learning_rate": 3.709841060426771e-06,
|
|
"loss": 2.2738161087036133,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 1.8755252100840336,
|
|
"grad_norm": 22.653913600727332,
|
|
"learning_rate": 3.706888765501545e-06,
|
|
"loss": 1.3991615772247314,
|
|
"step": 3571
|
|
},
|
|
{
|
|
"epoch": 1.8760504201680672,
|
|
"grad_norm": 9.38772465128983,
|
|
"learning_rate": 3.7039369536455525e-06,
|
|
"loss": 1.4856879711151123,
|
|
"step": 3572
|
|
},
|
|
{
|
|
"epoch": 1.8765756302521008,
|
|
"grad_norm": 12.572089255662306,
|
|
"learning_rate": 3.7009856259615074e-06,
|
|
"loss": 1.2467734813690186,
|
|
"step": 3573
|
|
},
|
|
{
|
|
"epoch": 1.8771008403361344,
|
|
"grad_norm": 12.134114511683737,
|
|
"learning_rate": 3.698034783551939e-06,
|
|
"loss": 1.200655460357666,
|
|
"step": 3574
|
|
},
|
|
{
|
|
"epoch": 1.877626050420168,
|
|
"grad_norm": 10.027598369525876,
|
|
"learning_rate": 3.6950844275191973e-06,
|
|
"loss": 1.10594642162323,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 1.8781512605042017,
|
|
"grad_norm": 13.63807690445431,
|
|
"learning_rate": 3.6921345589654524e-06,
|
|
"loss": 1.6851091384887695,
|
|
"step": 3576
|
|
},
|
|
{
|
|
"epoch": 1.8786764705882353,
|
|
"grad_norm": 15.168541909229665,
|
|
"learning_rate": 3.689185178992689e-06,
|
|
"loss": 1.5337399244308472,
|
|
"step": 3577
|
|
},
|
|
{
|
|
"epoch": 1.879201680672269,
|
|
"grad_norm": 12.826929763580086,
|
|
"learning_rate": 3.686236288702712e-06,
|
|
"loss": 1.040381908416748,
|
|
"step": 3578
|
|
},
|
|
{
|
|
"epoch": 1.8797268907563025,
|
|
"grad_norm": 5.720647829190294,
|
|
"learning_rate": 3.68328788919714e-06,
|
|
"loss": 1.0834476947784424,
|
|
"step": 3579
|
|
},
|
|
{
|
|
"epoch": 1.8802521008403361,
|
|
"grad_norm": 7.91033875802829,
|
|
"learning_rate": 3.6803399815774133e-06,
|
|
"loss": 1.9564454555511475,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 1.8807773109243697,
|
|
"grad_norm": 9.930278788362486,
|
|
"learning_rate": 3.677392566944783e-06,
|
|
"loss": 1.725341558456421,
|
|
"step": 3581
|
|
},
|
|
{
|
|
"epoch": 1.8813025210084033,
|
|
"grad_norm": 9.461795901718977,
|
|
"learning_rate": 3.674445646400321e-06,
|
|
"loss": 1.4589251279830933,
|
|
"step": 3582
|
|
},
|
|
{
|
|
"epoch": 1.881827731092437,
|
|
"grad_norm": 12.069724162160428,
|
|
"learning_rate": 3.6714992210449084e-06,
|
|
"loss": 1.4926224946975708,
|
|
"step": 3583
|
|
},
|
|
{
|
|
"epoch": 1.8823529411764706,
|
|
"grad_norm": 6.813433444255876,
|
|
"learning_rate": 3.66855329197925e-06,
|
|
"loss": 0.6729644536972046,
|
|
"step": 3584
|
|
},
|
|
{
|
|
"epoch": 1.8828781512605042,
|
|
"grad_norm": 12.39938011433959,
|
|
"learning_rate": 3.665607860303857e-06,
|
|
"loss": 1.424481749534607,
|
|
"step": 3585
|
|
},
|
|
{
|
|
"epoch": 1.8834033613445378,
|
|
"grad_norm": 12.043233977310381,
|
|
"learning_rate": 3.6626629271190594e-06,
|
|
"loss": 1.4678826332092285,
|
|
"step": 3586
|
|
},
|
|
{
|
|
"epoch": 1.8839285714285714,
|
|
"grad_norm": 17.906264162364973,
|
|
"learning_rate": 3.6597184935249986e-06,
|
|
"loss": 1.7656259536743164,
|
|
"step": 3587
|
|
},
|
|
{
|
|
"epoch": 1.884453781512605,
|
|
"grad_norm": 10.028723828386402,
|
|
"learning_rate": 3.656774560621632e-06,
|
|
"loss": 1.4614261388778687,
|
|
"step": 3588
|
|
},
|
|
{
|
|
"epoch": 1.8849789915966386,
|
|
"grad_norm": 10.62126280003491,
|
|
"learning_rate": 3.653831129508727e-06,
|
|
"loss": 1.4474071264266968,
|
|
"step": 3589
|
|
},
|
|
{
|
|
"epoch": 1.8855042016806722,
|
|
"grad_norm": 11.587557348981067,
|
|
"learning_rate": 3.6508882012858647e-06,
|
|
"loss": 0.8009591102600098,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 1.8860294117647058,
|
|
"grad_norm": 9.23527206371919,
|
|
"learning_rate": 3.6479457770524413e-06,
|
|
"loss": 1.29331636428833,
|
|
"step": 3591
|
|
},
|
|
{
|
|
"epoch": 1.8865546218487395,
|
|
"grad_norm": 11.342049018828623,
|
|
"learning_rate": 3.6450038579076595e-06,
|
|
"loss": 1.700790286064148,
|
|
"step": 3592
|
|
},
|
|
{
|
|
"epoch": 1.887079831932773,
|
|
"grad_norm": 12.725170037155792,
|
|
"learning_rate": 3.642062444950537e-06,
|
|
"loss": 1.4710191488265991,
|
|
"step": 3593
|
|
},
|
|
{
|
|
"epoch": 1.8876050420168067,
|
|
"grad_norm": 19.69353183373372,
|
|
"learning_rate": 3.6391215392798994e-06,
|
|
"loss": 1.856687068939209,
|
|
"step": 3594
|
|
},
|
|
{
|
|
"epoch": 1.8881302521008403,
|
|
"grad_norm": 12.660195112849834,
|
|
"learning_rate": 3.636181141994387e-06,
|
|
"loss": 1.7518823146820068,
|
|
"step": 3595
|
|
},
|
|
{
|
|
"epoch": 1.888655462184874,
|
|
"grad_norm": 13.469637240764241,
|
|
"learning_rate": 3.6332412541924473e-06,
|
|
"loss": 2.629429340362549,
|
|
"step": 3596
|
|
},
|
|
{
|
|
"epoch": 1.8891806722689075,
|
|
"grad_norm": 14.229616913477427,
|
|
"learning_rate": 3.630301876972337e-06,
|
|
"loss": 1.3876407146453857,
|
|
"step": 3597
|
|
},
|
|
{
|
|
"epoch": 1.8897058823529411,
|
|
"grad_norm": 11.029448677979465,
|
|
"learning_rate": 3.6273630114321223e-06,
|
|
"loss": 2.240974187850952,
|
|
"step": 3598
|
|
},
|
|
{
|
|
"epoch": 1.8902310924369747,
|
|
"grad_norm": 10.6206510207668,
|
|
"learning_rate": 3.624424658669682e-06,
|
|
"loss": 1.4731332063674927,
|
|
"step": 3599
|
|
},
|
|
{
|
|
"epoch": 1.8907563025210083,
|
|
"grad_norm": 12.444114612667324,
|
|
"learning_rate": 3.621486819782698e-06,
|
|
"loss": 1.0779138803482056,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 1.8912815126050422,
|
|
"grad_norm": 8.349489876337945,
|
|
"learning_rate": 3.618549495868662e-06,
|
|
"loss": 1.559354305267334,
|
|
"step": 3601
|
|
},
|
|
{
|
|
"epoch": 1.8918067226890756,
|
|
"grad_norm": 8.863323426779825,
|
|
"learning_rate": 3.6156126880248765e-06,
|
|
"loss": 1.8395235538482666,
|
|
"step": 3602
|
|
},
|
|
{
|
|
"epoch": 1.8923319327731094,
|
|
"grad_norm": 15.85107512087656,
|
|
"learning_rate": 3.612676397348447e-06,
|
|
"loss": 1.1617748737335205,
|
|
"step": 3603
|
|
},
|
|
{
|
|
"epoch": 1.8928571428571428,
|
|
"grad_norm": 15.80706900262539,
|
|
"learning_rate": 3.6097406249362877e-06,
|
|
"loss": 1.8298149108886719,
|
|
"step": 3604
|
|
},
|
|
{
|
|
"epoch": 1.8933823529411766,
|
|
"grad_norm": 8.974253810745306,
|
|
"learning_rate": 3.606805371885117e-06,
|
|
"loss": 1.0484018325805664,
|
|
"step": 3605
|
|
},
|
|
{
|
|
"epoch": 1.89390756302521,
|
|
"grad_norm": 11.802935511606998,
|
|
"learning_rate": 3.6038706392914647e-06,
|
|
"loss": 1.3665120601654053,
|
|
"step": 3606
|
|
},
|
|
{
|
|
"epoch": 1.8944327731092439,
|
|
"grad_norm": 12.486120252985891,
|
|
"learning_rate": 3.60093642825166e-06,
|
|
"loss": 1.7603936195373535,
|
|
"step": 3607
|
|
},
|
|
{
|
|
"epoch": 1.8949579831932772,
|
|
"grad_norm": 10.979063442675901,
|
|
"learning_rate": 3.598002739861841e-06,
|
|
"loss": 1.2447429895401,
|
|
"step": 3608
|
|
},
|
|
{
|
|
"epoch": 1.895483193277311,
|
|
"grad_norm": 11.24906092027409,
|
|
"learning_rate": 3.5950695752179487e-06,
|
|
"loss": 2.0604703426361084,
|
|
"step": 3609
|
|
},
|
|
{
|
|
"epoch": 1.8960084033613445,
|
|
"grad_norm": 9.453625736292222,
|
|
"learning_rate": 3.5921369354157303e-06,
|
|
"loss": 1.775797724723816,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 1.8965336134453783,
|
|
"grad_norm": 7.1879898781777785,
|
|
"learning_rate": 3.5892048215507356e-06,
|
|
"loss": 1.567115306854248,
|
|
"step": 3611
|
|
},
|
|
{
|
|
"epoch": 1.8970588235294117,
|
|
"grad_norm": 5.651610017591024,
|
|
"learning_rate": 3.5862732347183165e-06,
|
|
"loss": 1.0143564939498901,
|
|
"step": 3612
|
|
},
|
|
{
|
|
"epoch": 1.8975840336134455,
|
|
"grad_norm": 8.618979134107734,
|
|
"learning_rate": 3.5833421760136323e-06,
|
|
"loss": 2.064939022064209,
|
|
"step": 3613
|
|
},
|
|
{
|
|
"epoch": 1.898109243697479,
|
|
"grad_norm": 10.291790714204135,
|
|
"learning_rate": 3.580411646531641e-06,
|
|
"loss": 1.848331093788147,
|
|
"step": 3614
|
|
},
|
|
{
|
|
"epoch": 1.8986344537815127,
|
|
"grad_norm": 7.4177590835040395,
|
|
"learning_rate": 3.5774816473671037e-06,
|
|
"loss": 1.7782447338104248,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 1.8991596638655461,
|
|
"grad_norm": 10.985342350821906,
|
|
"learning_rate": 3.574552179614584e-06,
|
|
"loss": 1.1551049947738647,
|
|
"step": 3616
|
|
},
|
|
{
|
|
"epoch": 1.89968487394958,
|
|
"grad_norm": 12.559700565722542,
|
|
"learning_rate": 3.5716232443684486e-06,
|
|
"loss": 1.6754170656204224,
|
|
"step": 3617
|
|
},
|
|
{
|
|
"epoch": 1.9002100840336134,
|
|
"grad_norm": 10.86033585411204,
|
|
"learning_rate": 3.568694842722863e-06,
|
|
"loss": 1.5450774431228638,
|
|
"step": 3618
|
|
},
|
|
{
|
|
"epoch": 1.9007352941176472,
|
|
"grad_norm": 11.226671306631834,
|
|
"learning_rate": 3.5657669757717927e-06,
|
|
"loss": 2.153855323791504,
|
|
"step": 3619
|
|
},
|
|
{
|
|
"epoch": 1.9012605042016806,
|
|
"grad_norm": 21.87746772998472,
|
|
"learning_rate": 3.562839644609005e-06,
|
|
"loss": 1.87334406375885,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 1.9017857142857144,
|
|
"grad_norm": 8.756738357423524,
|
|
"learning_rate": 3.559912850328069e-06,
|
|
"loss": 1.6386264562606812,
|
|
"step": 3621
|
|
},
|
|
{
|
|
"epoch": 1.9023109243697478,
|
|
"grad_norm": 7.400197604584446,
|
|
"learning_rate": 3.5569865940223492e-06,
|
|
"loss": 0.8230345249176025,
|
|
"step": 3622
|
|
},
|
|
{
|
|
"epoch": 1.9028361344537816,
|
|
"grad_norm": 12.103140883663276,
|
|
"learning_rate": 3.5540608767850106e-06,
|
|
"loss": 1.4310319423675537,
|
|
"step": 3623
|
|
},
|
|
{
|
|
"epoch": 1.903361344537815,
|
|
"grad_norm": 7.910084852656171,
|
|
"learning_rate": 3.5511356997090176e-06,
|
|
"loss": 1.5238559246063232,
|
|
"step": 3624
|
|
},
|
|
{
|
|
"epoch": 1.9038865546218489,
|
|
"grad_norm": 13.138582623231445,
|
|
"learning_rate": 3.5482110638871325e-06,
|
|
"loss": 1.183509111404419,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 1.9044117647058822,
|
|
"grad_norm": 9.591732145752783,
|
|
"learning_rate": 3.5452869704119156e-06,
|
|
"loss": 1.066697120666504,
|
|
"step": 3626
|
|
},
|
|
{
|
|
"epoch": 1.904936974789916,
|
|
"grad_norm": 12.416841644173525,
|
|
"learning_rate": 3.5423634203757235e-06,
|
|
"loss": 1.1212185621261597,
|
|
"step": 3627
|
|
},
|
|
{
|
|
"epoch": 1.9054621848739495,
|
|
"grad_norm": 8.640970310864347,
|
|
"learning_rate": 3.539440414870712e-06,
|
|
"loss": 1.0890882015228271,
|
|
"step": 3628
|
|
},
|
|
{
|
|
"epoch": 1.9059873949579833,
|
|
"grad_norm": 14.555649963630264,
|
|
"learning_rate": 3.5365179549888306e-06,
|
|
"loss": 1.2889267206192017,
|
|
"step": 3629
|
|
},
|
|
{
|
|
"epoch": 1.9065126050420167,
|
|
"grad_norm": 11.4173030386502,
|
|
"learning_rate": 3.5335960418218272e-06,
|
|
"loss": 1.409904956817627,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 1.9070378151260505,
|
|
"grad_norm": 8.089996616310701,
|
|
"learning_rate": 3.5306746764612433e-06,
|
|
"loss": 2.159165143966675,
|
|
"step": 3631
|
|
},
|
|
{
|
|
"epoch": 1.907563025210084,
|
|
"grad_norm": 15.391522922837247,
|
|
"learning_rate": 3.527753859998419e-06,
|
|
"loss": 2.1786513328552246,
|
|
"step": 3632
|
|
},
|
|
{
|
|
"epoch": 1.9080882352941178,
|
|
"grad_norm": 13.936825236031506,
|
|
"learning_rate": 3.524833593524487e-06,
|
|
"loss": 1.3346937894821167,
|
|
"step": 3633
|
|
},
|
|
{
|
|
"epoch": 1.9086134453781511,
|
|
"grad_norm": 8.008434850996032,
|
|
"learning_rate": 3.5219138781303743e-06,
|
|
"loss": 1.6728880405426025,
|
|
"step": 3634
|
|
},
|
|
{
|
|
"epoch": 1.909138655462185,
|
|
"grad_norm": 8.091000502435186,
|
|
"learning_rate": 3.5189947149068028e-06,
|
|
"loss": 1.7466754913330078,
|
|
"step": 3635
|
|
},
|
|
{
|
|
"epoch": 1.9096638655462184,
|
|
"grad_norm": 14.247421381338212,
|
|
"learning_rate": 3.5160761049442887e-06,
|
|
"loss": 2.038893461227417,
|
|
"step": 3636
|
|
},
|
|
{
|
|
"epoch": 1.9101890756302522,
|
|
"grad_norm": 26.21073194103894,
|
|
"learning_rate": 3.513158049333141e-06,
|
|
"loss": 1.2838835716247559,
|
|
"step": 3637
|
|
},
|
|
{
|
|
"epoch": 1.9107142857142856,
|
|
"grad_norm": 21.439346341804875,
|
|
"learning_rate": 3.51024054916346e-06,
|
|
"loss": 2.3123278617858887,
|
|
"step": 3638
|
|
},
|
|
{
|
|
"epoch": 1.9112394957983194,
|
|
"grad_norm": 14.333166847254372,
|
|
"learning_rate": 3.5073236055251425e-06,
|
|
"loss": 1.8338489532470703,
|
|
"step": 3639
|
|
},
|
|
{
|
|
"epoch": 1.9117647058823528,
|
|
"grad_norm": 16.659338932005724,
|
|
"learning_rate": 3.504407219507873e-06,
|
|
"loss": 1.7070486545562744,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 1.9122899159663866,
|
|
"grad_norm": 7.982948890385432,
|
|
"learning_rate": 3.50149139220113e-06,
|
|
"loss": 0.487301766872406,
|
|
"step": 3641
|
|
},
|
|
{
|
|
"epoch": 1.91281512605042,
|
|
"grad_norm": 9.460227929108488,
|
|
"learning_rate": 3.4985761246941825e-06,
|
|
"loss": 2.13903546333313,
|
|
"step": 3642
|
|
},
|
|
{
|
|
"epoch": 1.9133403361344539,
|
|
"grad_norm": 15.804121583915196,
|
|
"learning_rate": 3.4956614180760918e-06,
|
|
"loss": 1.2893824577331543,
|
|
"step": 3643
|
|
},
|
|
{
|
|
"epoch": 1.9138655462184873,
|
|
"grad_norm": 12.95136446520394,
|
|
"learning_rate": 3.492747273435708e-06,
|
|
"loss": 2.1587581634521484,
|
|
"step": 3644
|
|
},
|
|
{
|
|
"epoch": 1.914390756302521,
|
|
"grad_norm": 9.512014326052435,
|
|
"learning_rate": 3.4898336918616726e-06,
|
|
"loss": 1.3404757976531982,
|
|
"step": 3645
|
|
},
|
|
{
|
|
"epoch": 1.9149159663865545,
|
|
"grad_norm": 10.877528867749788,
|
|
"learning_rate": 3.4869206744424145e-06,
|
|
"loss": 1.6557071208953857,
|
|
"step": 3646
|
|
},
|
|
{
|
|
"epoch": 1.9154411764705883,
|
|
"grad_norm": 15.992389857564886,
|
|
"learning_rate": 3.4840082222661543e-06,
|
|
"loss": 1.7658555507659912,
|
|
"step": 3647
|
|
},
|
|
{
|
|
"epoch": 1.9159663865546217,
|
|
"grad_norm": 16.751926277390957,
|
|
"learning_rate": 3.481096336420901e-06,
|
|
"loss": 1.9302045106887817,
|
|
"step": 3648
|
|
},
|
|
{
|
|
"epoch": 1.9164915966386555,
|
|
"grad_norm": 11.168683662508885,
|
|
"learning_rate": 3.47818501799445e-06,
|
|
"loss": 1.350619912147522,
|
|
"step": 3649
|
|
},
|
|
{
|
|
"epoch": 1.917016806722689,
|
|
"grad_norm": 10.95598176963718,
|
|
"learning_rate": 3.475274268074388e-06,
|
|
"loss": 1.9971766471862793,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 1.9175420168067228,
|
|
"grad_norm": 8.588886644464463,
|
|
"learning_rate": 3.4723640877480875e-06,
|
|
"loss": 1.612987756729126,
|
|
"step": 3651
|
|
},
|
|
{
|
|
"epoch": 1.9180672268907561,
|
|
"grad_norm": 7.961264639918314,
|
|
"learning_rate": 3.4694544781027072e-06,
|
|
"loss": 1.1771678924560547,
|
|
"step": 3652
|
|
},
|
|
{
|
|
"epoch": 1.91859243697479,
|
|
"grad_norm": 16.40130897804457,
|
|
"learning_rate": 3.4665454402251937e-06,
|
|
"loss": 1.7718167304992676,
|
|
"step": 3653
|
|
},
|
|
{
|
|
"epoch": 1.9191176470588234,
|
|
"grad_norm": 15.179397950212735,
|
|
"learning_rate": 3.4636369752022814e-06,
|
|
"loss": 2.4633116722106934,
|
|
"step": 3654
|
|
},
|
|
{
|
|
"epoch": 1.9196428571428572,
|
|
"grad_norm": 16.019901833368834,
|
|
"learning_rate": 3.460729084120488e-06,
|
|
"loss": 1.489286184310913,
|
|
"step": 3655
|
|
},
|
|
{
|
|
"epoch": 1.9201680672268906,
|
|
"grad_norm": 9.79445114832196,
|
|
"learning_rate": 3.4578217680661197e-06,
|
|
"loss": 0.9999767541885376,
|
|
"step": 3656
|
|
},
|
|
{
|
|
"epoch": 1.9206932773109244,
|
|
"grad_norm": 12.70204532003881,
|
|
"learning_rate": 3.4549150281252635e-06,
|
|
"loss": 1.3212666511535645,
|
|
"step": 3657
|
|
},
|
|
{
|
|
"epoch": 1.9212184873949578,
|
|
"grad_norm": 10.65781127575427,
|
|
"learning_rate": 3.452008865383797e-06,
|
|
"loss": 1.320206642150879,
|
|
"step": 3658
|
|
},
|
|
{
|
|
"epoch": 1.9217436974789917,
|
|
"grad_norm": 8.639190660447014,
|
|
"learning_rate": 3.4491032809273784e-06,
|
|
"loss": 0.8942955136299133,
|
|
"step": 3659
|
|
},
|
|
{
|
|
"epoch": 1.9222689075630253,
|
|
"grad_norm": 6.963753783884818,
|
|
"learning_rate": 3.4461982758414492e-06,
|
|
"loss": 1.3711848258972168,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 1.9227941176470589,
|
|
"grad_norm": 9.051633566956067,
|
|
"learning_rate": 3.443293851211237e-06,
|
|
"loss": 1.5953480005264282,
|
|
"step": 3661
|
|
},
|
|
{
|
|
"epoch": 1.9233193277310925,
|
|
"grad_norm": 11.05506440209341,
|
|
"learning_rate": 3.440390008121751e-06,
|
|
"loss": 1.541900634765625,
|
|
"step": 3662
|
|
},
|
|
{
|
|
"epoch": 1.923844537815126,
|
|
"grad_norm": 8.016742561486975,
|
|
"learning_rate": 3.437486747657785e-06,
|
|
"loss": 1.6536552906036377,
|
|
"step": 3663
|
|
},
|
|
{
|
|
"epoch": 1.9243697478991597,
|
|
"grad_norm": 15.532138363683787,
|
|
"learning_rate": 3.4345840709039113e-06,
|
|
"loss": 1.8288671970367432,
|
|
"step": 3664
|
|
},
|
|
{
|
|
"epoch": 1.9248949579831933,
|
|
"grad_norm": 15.628526432519445,
|
|
"learning_rate": 3.4316819789444893e-06,
|
|
"loss": 1.838546872138977,
|
|
"step": 3665
|
|
},
|
|
{
|
|
"epoch": 1.925420168067227,
|
|
"grad_norm": 9.284027106376742,
|
|
"learning_rate": 3.428780472863656e-06,
|
|
"loss": 1.7238937616348267,
|
|
"step": 3666
|
|
},
|
|
{
|
|
"epoch": 1.9259453781512605,
|
|
"grad_norm": 6.815635948477541,
|
|
"learning_rate": 3.4258795537453305e-06,
|
|
"loss": 1.6593295335769653,
|
|
"step": 3667
|
|
},
|
|
{
|
|
"epoch": 1.9264705882352942,
|
|
"grad_norm": 14.041982862594873,
|
|
"learning_rate": 3.4229792226732124e-06,
|
|
"loss": 1.732300043106079,
|
|
"step": 3668
|
|
},
|
|
{
|
|
"epoch": 1.9269957983193278,
|
|
"grad_norm": 8.674614768493917,
|
|
"learning_rate": 3.4200794807307834e-06,
|
|
"loss": 1.424330472946167,
|
|
"step": 3669
|
|
},
|
|
{
|
|
"epoch": 1.9275210084033614,
|
|
"grad_norm": 16.168015564998782,
|
|
"learning_rate": 3.4171803290013038e-06,
|
|
"loss": 1.5260021686553955,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 1.928046218487395,
|
|
"grad_norm": 13.99685376929219,
|
|
"learning_rate": 3.4142817685678128e-06,
|
|
"loss": 1.8458993434906006,
|
|
"step": 3671
|
|
},
|
|
{
|
|
"epoch": 1.9285714285714286,
|
|
"grad_norm": 10.859423445634478,
|
|
"learning_rate": 3.4113838005131285e-06,
|
|
"loss": 0.9052958488464355,
|
|
"step": 3672
|
|
},
|
|
{
|
|
"epoch": 1.9290966386554622,
|
|
"grad_norm": 18.227962265786484,
|
|
"learning_rate": 3.40848642591985e-06,
|
|
"loss": 1.5667917728424072,
|
|
"step": 3673
|
|
},
|
|
{
|
|
"epoch": 1.9296218487394958,
|
|
"grad_norm": 9.098874736763168,
|
|
"learning_rate": 3.405589645870354e-06,
|
|
"loss": 1.3228158950805664,
|
|
"step": 3674
|
|
},
|
|
{
|
|
"epoch": 1.9301470588235294,
|
|
"grad_norm": 13.506506614700806,
|
|
"learning_rate": 3.4026934614467916e-06,
|
|
"loss": 1.3141610622406006,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 1.930672268907563,
|
|
"grad_norm": 15.543212721499522,
|
|
"learning_rate": 3.3997978737310964e-06,
|
|
"loss": 1.7305514812469482,
|
|
"step": 3676
|
|
},
|
|
{
|
|
"epoch": 1.9311974789915967,
|
|
"grad_norm": 12.347081280456088,
|
|
"learning_rate": 3.3969028838049765e-06,
|
|
"loss": 1.2748572826385498,
|
|
"step": 3677
|
|
},
|
|
{
|
|
"epoch": 1.9317226890756303,
|
|
"grad_norm": 12.842481440834034,
|
|
"learning_rate": 3.394008492749917e-06,
|
|
"loss": 1.6242189407348633,
|
|
"step": 3678
|
|
},
|
|
{
|
|
"epoch": 1.9322478991596639,
|
|
"grad_norm": 11.68316370010674,
|
|
"learning_rate": 3.3911147016471784e-06,
|
|
"loss": 1.629981517791748,
|
|
"step": 3679
|
|
},
|
|
{
|
|
"epoch": 1.9327731092436975,
|
|
"grad_norm": 9.315271218726362,
|
|
"learning_rate": 3.3882215115777995e-06,
|
|
"loss": 1.5515351295471191,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 1.933298319327731,
|
|
"grad_norm": 9.92727628547744,
|
|
"learning_rate": 3.3853289236225917e-06,
|
|
"loss": 1.4925764799118042,
|
|
"step": 3681
|
|
},
|
|
{
|
|
"epoch": 1.9338235294117647,
|
|
"grad_norm": 14.633372091900995,
|
|
"learning_rate": 3.3824369388621435e-06,
|
|
"loss": 2.2880353927612305,
|
|
"step": 3682
|
|
},
|
|
{
|
|
"epoch": 1.9343487394957983,
|
|
"grad_norm": 9.454464207090755,
|
|
"learning_rate": 3.379545558376816e-06,
|
|
"loss": 1.442641258239746,
|
|
"step": 3683
|
|
},
|
|
{
|
|
"epoch": 1.934873949579832,
|
|
"grad_norm": 6.088166946496283,
|
|
"learning_rate": 3.376654783246749e-06,
|
|
"loss": 1.7409708499908447,
|
|
"step": 3684
|
|
},
|
|
{
|
|
"epoch": 1.9353991596638656,
|
|
"grad_norm": 9.464198513423897,
|
|
"learning_rate": 3.37376461455185e-06,
|
|
"loss": 2.063614845275879,
|
|
"step": 3685
|
|
},
|
|
{
|
|
"epoch": 1.9359243697478992,
|
|
"grad_norm": 19.974339072441122,
|
|
"learning_rate": 3.3708750533718037e-06,
|
|
"loss": 2.40267014503479,
|
|
"step": 3686
|
|
},
|
|
{
|
|
"epoch": 1.9364495798319328,
|
|
"grad_norm": 15.103949152786987,
|
|
"learning_rate": 3.367986100786069e-06,
|
|
"loss": 1.6338622570037842,
|
|
"step": 3687
|
|
},
|
|
{
|
|
"epoch": 1.9369747899159664,
|
|
"grad_norm": 9.64007048692211,
|
|
"learning_rate": 3.365097757873874e-06,
|
|
"loss": 1.9974913597106934,
|
|
"step": 3688
|
|
},
|
|
{
|
|
"epoch": 1.9375,
|
|
"grad_norm": 11.082012391675923,
|
|
"learning_rate": 3.362210025714222e-06,
|
|
"loss": 1.8873059749603271,
|
|
"step": 3689
|
|
},
|
|
{
|
|
"epoch": 1.9380252100840336,
|
|
"grad_norm": 11.607456697270916,
|
|
"learning_rate": 3.3593229053858846e-06,
|
|
"loss": 1.1598262786865234,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 1.9385504201680672,
|
|
"grad_norm": 13.096992239545292,
|
|
"learning_rate": 3.356436397967409e-06,
|
|
"loss": 1.7868146896362305,
|
|
"step": 3691
|
|
},
|
|
{
|
|
"epoch": 1.9390756302521008,
|
|
"grad_norm": 13.043105799818077,
|
|
"learning_rate": 3.353550504537111e-06,
|
|
"loss": 1.3639363050460815,
|
|
"step": 3692
|
|
},
|
|
{
|
|
"epoch": 1.9396008403361344,
|
|
"grad_norm": 18.45181622734499,
|
|
"learning_rate": 3.350665226173078e-06,
|
|
"loss": 1.4068472385406494,
|
|
"step": 3693
|
|
},
|
|
{
|
|
"epoch": 1.940126050420168,
|
|
"grad_norm": 9.678660726074634,
|
|
"learning_rate": 3.347780563953165e-06,
|
|
"loss": 1.2107820510864258,
|
|
"step": 3694
|
|
},
|
|
{
|
|
"epoch": 1.9406512605042017,
|
|
"grad_norm": 13.992355103663318,
|
|
"learning_rate": 3.344896518955002e-06,
|
|
"loss": 1.444906234741211,
|
|
"step": 3695
|
|
},
|
|
{
|
|
"epoch": 1.9411764705882353,
|
|
"grad_norm": 13.235624185392497,
|
|
"learning_rate": 3.3420130922559848e-06,
|
|
"loss": 2.010563611984253,
|
|
"step": 3696
|
|
},
|
|
{
|
|
"epoch": 1.941701680672269,
|
|
"grad_norm": 12.480438335315137,
|
|
"learning_rate": 3.339130284933276e-06,
|
|
"loss": 1.2893167734146118,
|
|
"step": 3697
|
|
},
|
|
{
|
|
"epoch": 1.9422268907563025,
|
|
"grad_norm": 12.135467993135597,
|
|
"learning_rate": 3.336248098063812e-06,
|
|
"loss": 2.6332337856292725,
|
|
"step": 3698
|
|
},
|
|
{
|
|
"epoch": 1.9427521008403361,
|
|
"grad_norm": 11.024369575537296,
|
|
"learning_rate": 3.3333665327242965e-06,
|
|
"loss": 2.1515727043151855,
|
|
"step": 3699
|
|
},
|
|
{
|
|
"epoch": 1.9432773109243697,
|
|
"grad_norm": 13.106453011431299,
|
|
"learning_rate": 3.330485589991197e-06,
|
|
"loss": 1.2165805101394653,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 1.9438025210084033,
|
|
"grad_norm": 13.85414619376918,
|
|
"learning_rate": 3.327605270940751e-06,
|
|
"loss": 1.3357783555984497,
|
|
"step": 3701
|
|
},
|
|
{
|
|
"epoch": 1.944327731092437,
|
|
"grad_norm": 9.926190940706347,
|
|
"learning_rate": 3.324725576648965e-06,
|
|
"loss": 2.4131393432617188,
|
|
"step": 3702
|
|
},
|
|
{
|
|
"epoch": 1.9448529411764706,
|
|
"grad_norm": 11.354582720037861,
|
|
"learning_rate": 3.321846508191609e-06,
|
|
"loss": 1.106598138809204,
|
|
"step": 3703
|
|
},
|
|
{
|
|
"epoch": 1.9453781512605042,
|
|
"grad_norm": 10.70693421051955,
|
|
"learning_rate": 3.3189680666442205e-06,
|
|
"loss": 1.2720839977264404,
|
|
"step": 3704
|
|
},
|
|
{
|
|
"epoch": 1.9459033613445378,
|
|
"grad_norm": 10.698068482218543,
|
|
"learning_rate": 3.316090253082101e-06,
|
|
"loss": 1.9747552871704102,
|
|
"step": 3705
|
|
},
|
|
{
|
|
"epoch": 1.9464285714285714,
|
|
"grad_norm": 11.248755062109401,
|
|
"learning_rate": 3.31321306858032e-06,
|
|
"loss": 1.2248642444610596,
|
|
"step": 3706
|
|
},
|
|
{
|
|
"epoch": 1.946953781512605,
|
|
"grad_norm": 12.4722586013678,
|
|
"learning_rate": 3.3103365142137128e-06,
|
|
"loss": 1.333898663520813,
|
|
"step": 3707
|
|
},
|
|
{
|
|
"epoch": 1.9474789915966386,
|
|
"grad_norm": 10.862660634097313,
|
|
"learning_rate": 3.307460591056877e-06,
|
|
"loss": 1.921349048614502,
|
|
"step": 3708
|
|
},
|
|
{
|
|
"epoch": 1.9480042016806722,
|
|
"grad_norm": 20.307690767329017,
|
|
"learning_rate": 3.304585300184173e-06,
|
|
"loss": 1.4340145587921143,
|
|
"step": 3709
|
|
},
|
|
{
|
|
"epoch": 1.9485294117647058,
|
|
"grad_norm": 38.48257168199678,
|
|
"learning_rate": 3.3017106426697286e-06,
|
|
"loss": 1.2401962280273438,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 1.9490546218487395,
|
|
"grad_norm": 8.868820101035617,
|
|
"learning_rate": 3.2988366195874335e-06,
|
|
"loss": 1.4671754837036133,
|
|
"step": 3711
|
|
},
|
|
{
|
|
"epoch": 1.949579831932773,
|
|
"grad_norm": 10.470972382528588,
|
|
"learning_rate": 3.2959632320109385e-06,
|
|
"loss": 1.9609804153442383,
|
|
"step": 3712
|
|
},
|
|
{
|
|
"epoch": 1.9501050420168067,
|
|
"grad_norm": 9.193010059542639,
|
|
"learning_rate": 3.293090481013661e-06,
|
|
"loss": 1.9202802181243896,
|
|
"step": 3713
|
|
},
|
|
{
|
|
"epoch": 1.9506302521008403,
|
|
"grad_norm": 10.5872646772965,
|
|
"learning_rate": 3.290218367668775e-06,
|
|
"loss": 1.6255062818527222,
|
|
"step": 3714
|
|
},
|
|
{
|
|
"epoch": 1.951155462184874,
|
|
"grad_norm": 9.625391905012721,
|
|
"learning_rate": 3.2873468930492232e-06,
|
|
"loss": 1.5651859045028687,
|
|
"step": 3715
|
|
},
|
|
{
|
|
"epoch": 1.9516806722689075,
|
|
"grad_norm": 11.509002659147203,
|
|
"learning_rate": 3.2844760582277047e-06,
|
|
"loss": 1.252101182937622,
|
|
"step": 3716
|
|
},
|
|
{
|
|
"epoch": 1.9522058823529411,
|
|
"grad_norm": 11.362263113850556,
|
|
"learning_rate": 3.28160586427668e-06,
|
|
"loss": 1.657231092453003,
|
|
"step": 3717
|
|
},
|
|
{
|
|
"epoch": 1.9527310924369747,
|
|
"grad_norm": 12.492346854863209,
|
|
"learning_rate": 3.2787363122683714e-06,
|
|
"loss": 1.4209221601486206,
|
|
"step": 3718
|
|
},
|
|
{
|
|
"epoch": 1.9532563025210083,
|
|
"grad_norm": 10.78083298459032,
|
|
"learning_rate": 3.275867403274763e-06,
|
|
"loss": 1.3532744646072388,
|
|
"step": 3719
|
|
},
|
|
{
|
|
"epoch": 1.9537815126050422,
|
|
"grad_norm": 10.980794342686636,
|
|
"learning_rate": 3.2729991383675952e-06,
|
|
"loss": 1.6606961488723755,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 1.9543067226890756,
|
|
"grad_norm": 11.617088574682121,
|
|
"learning_rate": 3.2701315186183692e-06,
|
|
"loss": 1.364490270614624,
|
|
"step": 3721
|
|
},
|
|
{
|
|
"epoch": 1.9548319327731094,
|
|
"grad_norm": 7.731508149813748,
|
|
"learning_rate": 3.2672645450983465e-06,
|
|
"loss": 0.9687061309814453,
|
|
"step": 3722
|
|
},
|
|
{
|
|
"epoch": 1.9553571428571428,
|
|
"grad_norm": 14.167412966780217,
|
|
"learning_rate": 3.2643982188785457e-06,
|
|
"loss": 1.4106699228286743,
|
|
"step": 3723
|
|
},
|
|
{
|
|
"epoch": 1.9558823529411766,
|
|
"grad_norm": 10.325230666905087,
|
|
"learning_rate": 3.261532541029744e-06,
|
|
"loss": 1.0961787700653076,
|
|
"step": 3724
|
|
},
|
|
{
|
|
"epoch": 1.95640756302521,
|
|
"grad_norm": 12.380657626003757,
|
|
"learning_rate": 3.258667512622475e-06,
|
|
"loss": 2.1758525371551514,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 1.9569327731092439,
|
|
"grad_norm": 12.375978678030158,
|
|
"learning_rate": 3.2558031347270337e-06,
|
|
"loss": 1.6618202924728394,
|
|
"step": 3726
|
|
},
|
|
{
|
|
"epoch": 1.9574579831932772,
|
|
"grad_norm": 18.36931525171859,
|
|
"learning_rate": 3.252939408413467e-06,
|
|
"loss": 1.6743544340133667,
|
|
"step": 3727
|
|
},
|
|
{
|
|
"epoch": 1.957983193277311,
|
|
"grad_norm": 13.844652681081724,
|
|
"learning_rate": 3.250076334751583e-06,
|
|
"loss": 1.5532047748565674,
|
|
"step": 3728
|
|
},
|
|
{
|
|
"epoch": 1.9585084033613445,
|
|
"grad_norm": 11.717970592519013,
|
|
"learning_rate": 3.2472139148109416e-06,
|
|
"loss": 1.7142189741134644,
|
|
"step": 3729
|
|
},
|
|
{
|
|
"epoch": 1.9590336134453783,
|
|
"grad_norm": 13.045361171349294,
|
|
"learning_rate": 3.244352149660862e-06,
|
|
"loss": 1.671049952507019,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 1.9595588235294117,
|
|
"grad_norm": 8.762201229168596,
|
|
"learning_rate": 3.241491040370418e-06,
|
|
"loss": 1.460729956626892,
|
|
"step": 3731
|
|
},
|
|
{
|
|
"epoch": 1.9600840336134455,
|
|
"grad_norm": 11.680399571413231,
|
|
"learning_rate": 3.2386305880084362e-06,
|
|
"loss": 1.543219804763794,
|
|
"step": 3732
|
|
},
|
|
{
|
|
"epoch": 1.960609243697479,
|
|
"grad_norm": 14.670911846953633,
|
|
"learning_rate": 3.2357707936435013e-06,
|
|
"loss": 2.0265159606933594,
|
|
"step": 3733
|
|
},
|
|
{
|
|
"epoch": 1.9611344537815127,
|
|
"grad_norm": 18.386961995691504,
|
|
"learning_rate": 3.23291165834395e-06,
|
|
"loss": 1.4027315378189087,
|
|
"step": 3734
|
|
},
|
|
{
|
|
"epoch": 1.9616596638655461,
|
|
"grad_norm": 10.452503460247428,
|
|
"learning_rate": 3.2300531831778726e-06,
|
|
"loss": 1.4007822275161743,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 1.96218487394958,
|
|
"grad_norm": 11.202651971830901,
|
|
"learning_rate": 3.227195369213112e-06,
|
|
"loss": 1.1638128757476807,
|
|
"step": 3736
|
|
},
|
|
{
|
|
"epoch": 1.9627100840336134,
|
|
"grad_norm": 9.390953349282208,
|
|
"learning_rate": 3.224338217517269e-06,
|
|
"loss": 1.9747158288955688,
|
|
"step": 3737
|
|
},
|
|
{
|
|
"epoch": 1.9632352941176472,
|
|
"grad_norm": 11.709132535983134,
|
|
"learning_rate": 3.2214817291576905e-06,
|
|
"loss": 1.9067872762680054,
|
|
"step": 3738
|
|
},
|
|
{
|
|
"epoch": 1.9637605042016806,
|
|
"grad_norm": 8.790615053164244,
|
|
"learning_rate": 3.2186259052014797e-06,
|
|
"loss": 1.751824140548706,
|
|
"step": 3739
|
|
},
|
|
{
|
|
"epoch": 1.9642857142857144,
|
|
"grad_norm": 9.402188962100931,
|
|
"learning_rate": 3.2157707467154893e-06,
|
|
"loss": 1.681583046913147,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 1.9648109243697478,
|
|
"grad_norm": 11.439637967018207,
|
|
"learning_rate": 3.212916254766326e-06,
|
|
"loss": 2.8366539478302,
|
|
"step": 3741
|
|
},
|
|
{
|
|
"epoch": 1.9653361344537816,
|
|
"grad_norm": 13.600069767601083,
|
|
"learning_rate": 3.2100624304203463e-06,
|
|
"loss": 1.9346435070037842,
|
|
"step": 3742
|
|
},
|
|
{
|
|
"epoch": 1.965861344537815,
|
|
"grad_norm": 17.194659560760936,
|
|
"learning_rate": 3.2072092747436546e-06,
|
|
"loss": 1.8379788398742676,
|
|
"step": 3743
|
|
},
|
|
{
|
|
"epoch": 1.9663865546218489,
|
|
"grad_norm": 10.341331457245321,
|
|
"learning_rate": 3.204356788802111e-06,
|
|
"loss": 1.3968738317489624,
|
|
"step": 3744
|
|
},
|
|
{
|
|
"epoch": 1.9669117647058822,
|
|
"grad_norm": 15.955853952873733,
|
|
"learning_rate": 3.20150497366132e-06,
|
|
"loss": 1.8972653150558472,
|
|
"step": 3745
|
|
},
|
|
{
|
|
"epoch": 1.967436974789916,
|
|
"grad_norm": 9.289206563836805,
|
|
"learning_rate": 3.19865383038664e-06,
|
|
"loss": 2.190624713897705,
|
|
"step": 3746
|
|
},
|
|
{
|
|
"epoch": 1.9679621848739495,
|
|
"grad_norm": 12.037389036541798,
|
|
"learning_rate": 3.1958033600431736e-06,
|
|
"loss": 1.724454402923584,
|
|
"step": 3747
|
|
},
|
|
{
|
|
"epoch": 1.9684873949579833,
|
|
"grad_norm": 8.235554202708748,
|
|
"learning_rate": 3.1929535636957774e-06,
|
|
"loss": 1.6073118448257446,
|
|
"step": 3748
|
|
},
|
|
{
|
|
"epoch": 1.9690126050420167,
|
|
"grad_norm": 11.501803657406654,
|
|
"learning_rate": 3.190104442409052e-06,
|
|
"loss": 1.774906039237976,
|
|
"step": 3749
|
|
},
|
|
{
|
|
"epoch": 1.9695378151260505,
|
|
"grad_norm": 11.422652668633384,
|
|
"learning_rate": 3.1872559972473475e-06,
|
|
"loss": 1.8382604122161865,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 1.970063025210084,
|
|
"grad_norm": 24.28132793523804,
|
|
"learning_rate": 3.18440822927476e-06,
|
|
"loss": 1.1020464897155762,
|
|
"step": 3751
|
|
},
|
|
{
|
|
"epoch": 1.9705882352941178,
|
|
"grad_norm": 7.63934910351465,
|
|
"learning_rate": 3.1815611395551373e-06,
|
|
"loss": 1.5953342914581299,
|
|
"step": 3752
|
|
},
|
|
{
|
|
"epoch": 1.9711134453781511,
|
|
"grad_norm": 8.920141330418113,
|
|
"learning_rate": 3.1787147291520675e-06,
|
|
"loss": 1.7396833896636963,
|
|
"step": 3753
|
|
},
|
|
{
|
|
"epoch": 1.971638655462185,
|
|
"grad_norm": 13.786573748934304,
|
|
"learning_rate": 3.1758689991288886e-06,
|
|
"loss": 1.9062604904174805,
|
|
"step": 3754
|
|
},
|
|
{
|
|
"epoch": 1.9721638655462184,
|
|
"grad_norm": 9.332335856936437,
|
|
"learning_rate": 3.1730239505486827e-06,
|
|
"loss": 1.7307486534118652,
|
|
"step": 3755
|
|
},
|
|
{
|
|
"epoch": 1.9726890756302522,
|
|
"grad_norm": 9.633958070288003,
|
|
"learning_rate": 3.1701795844742806e-06,
|
|
"loss": 1.846346378326416,
|
|
"step": 3756
|
|
},
|
|
{
|
|
"epoch": 1.9732142857142856,
|
|
"grad_norm": 9.441284749946968,
|
|
"learning_rate": 3.1673359019682538e-06,
|
|
"loss": 1.3852074146270752,
|
|
"step": 3757
|
|
},
|
|
{
|
|
"epoch": 1.9737394957983194,
|
|
"grad_norm": 17.471973480024587,
|
|
"learning_rate": 3.164492904092921e-06,
|
|
"loss": 1.958652377128601,
|
|
"step": 3758
|
|
},
|
|
{
|
|
"epoch": 1.9742647058823528,
|
|
"grad_norm": 16.607085375072238,
|
|
"learning_rate": 3.1616505919103446e-06,
|
|
"loss": 1.3102972507476807,
|
|
"step": 3759
|
|
},
|
|
{
|
|
"epoch": 1.9747899159663866,
|
|
"grad_norm": 8.699856897784624,
|
|
"learning_rate": 3.158808966482331e-06,
|
|
"loss": 1.1039776802062988,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 1.97531512605042,
|
|
"grad_norm": 8.566075459963004,
|
|
"learning_rate": 3.1559680288704297e-06,
|
|
"loss": 1.9869599342346191,
|
|
"step": 3761
|
|
},
|
|
{
|
|
"epoch": 1.9758403361344539,
|
|
"grad_norm": 10.829440072378784,
|
|
"learning_rate": 3.1531277801359326e-06,
|
|
"loss": 1.6302375793457031,
|
|
"step": 3762
|
|
},
|
|
{
|
|
"epoch": 1.9763655462184873,
|
|
"grad_norm": 7.122397427214688,
|
|
"learning_rate": 3.1502882213398776e-06,
|
|
"loss": 0.7667158842086792,
|
|
"step": 3763
|
|
},
|
|
{
|
|
"epoch": 1.976890756302521,
|
|
"grad_norm": 11.39418212477519,
|
|
"learning_rate": 3.1474493535430408e-06,
|
|
"loss": 1.8233389854431152,
|
|
"step": 3764
|
|
},
|
|
{
|
|
"epoch": 1.9774159663865545,
|
|
"grad_norm": 11.53953144938256,
|
|
"learning_rate": 3.1446111778059405e-06,
|
|
"loss": 1.3942296504974365,
|
|
"step": 3765
|
|
},
|
|
{
|
|
"epoch": 1.9779411764705883,
|
|
"grad_norm": 11.918470999286875,
|
|
"learning_rate": 3.1417736951888385e-06,
|
|
"loss": 1.4948136806488037,
|
|
"step": 3766
|
|
},
|
|
{
|
|
"epoch": 1.9784663865546217,
|
|
"grad_norm": 13.96097017851373,
|
|
"learning_rate": 3.1389369067517383e-06,
|
|
"loss": 1.5201667547225952,
|
|
"step": 3767
|
|
},
|
|
{
|
|
"epoch": 1.9789915966386555,
|
|
"grad_norm": 20.99552237339867,
|
|
"learning_rate": 3.136100813554381e-06,
|
|
"loss": 2.2616405487060547,
|
|
"step": 3768
|
|
},
|
|
{
|
|
"epoch": 1.979516806722689,
|
|
"grad_norm": 14.868637286663587,
|
|
"learning_rate": 3.1332654166562494e-06,
|
|
"loss": 1.4352052211761475,
|
|
"step": 3769
|
|
},
|
|
{
|
|
"epoch": 1.9800420168067228,
|
|
"grad_norm": 7.7878468995648324,
|
|
"learning_rate": 3.1304307171165675e-06,
|
|
"loss": 1.4993510246276855,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 1.9805672268907561,
|
|
"grad_norm": 9.912002918695766,
|
|
"learning_rate": 3.1275967159942976e-06,
|
|
"loss": 1.2762047052383423,
|
|
"step": 3771
|
|
},
|
|
{
|
|
"epoch": 1.98109243697479,
|
|
"grad_norm": 10.98363667889731,
|
|
"learning_rate": 3.124763414348141e-06,
|
|
"loss": 1.6206231117248535,
|
|
"step": 3772
|
|
},
|
|
{
|
|
"epoch": 1.9816176470588234,
|
|
"grad_norm": 13.468199718917681,
|
|
"learning_rate": 3.1219308132365365e-06,
|
|
"loss": 1.6075819730758667,
|
|
"step": 3773
|
|
},
|
|
{
|
|
"epoch": 1.9821428571428572,
|
|
"grad_norm": 12.48978364742403,
|
|
"learning_rate": 3.1190989137176653e-06,
|
|
"loss": 1.6008939743041992,
|
|
"step": 3774
|
|
},
|
|
{
|
|
"epoch": 1.9826680672268906,
|
|
"grad_norm": 8.891890225396363,
|
|
"learning_rate": 3.116267716849441e-06,
|
|
"loss": 1.1342711448669434,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 1.9831932773109244,
|
|
"grad_norm": 7.782369396395457,
|
|
"learning_rate": 3.1134372236895193e-06,
|
|
"loss": 1.7301146984100342,
|
|
"step": 3776
|
|
},
|
|
{
|
|
"epoch": 1.9837184873949578,
|
|
"grad_norm": 14.291541152153517,
|
|
"learning_rate": 3.110607435295289e-06,
|
|
"loss": 1.9669077396392822,
|
|
"step": 3777
|
|
},
|
|
{
|
|
"epoch": 1.9842436974789917,
|
|
"grad_norm": 11.545226440558016,
|
|
"learning_rate": 3.1077783527238807e-06,
|
|
"loss": 1.5780227184295654,
|
|
"step": 3778
|
|
},
|
|
{
|
|
"epoch": 1.9847689075630253,
|
|
"grad_norm": 10.03545786660131,
|
|
"learning_rate": 3.1049499770321572e-06,
|
|
"loss": 1.5493851900100708,
|
|
"step": 3779
|
|
},
|
|
{
|
|
"epoch": 1.9852941176470589,
|
|
"grad_norm": 9.881074989678645,
|
|
"learning_rate": 3.102122309276717e-06,
|
|
"loss": 1.2470885515213013,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 1.9858193277310925,
|
|
"grad_norm": 10.085111739490527,
|
|
"learning_rate": 3.099295350513898e-06,
|
|
"loss": 1.616032361984253,
|
|
"step": 3781
|
|
},
|
|
{
|
|
"epoch": 1.986344537815126,
|
|
"grad_norm": 8.1920549728376,
|
|
"learning_rate": 3.09646910179977e-06,
|
|
"loss": 1.7792916297912598,
|
|
"step": 3782
|
|
},
|
|
{
|
|
"epoch": 1.9868697478991597,
|
|
"grad_norm": 10.285570410870244,
|
|
"learning_rate": 3.093643564190138e-06,
|
|
"loss": 2.3514113426208496,
|
|
"step": 3783
|
|
},
|
|
{
|
|
"epoch": 1.9873949579831933,
|
|
"grad_norm": 11.833868373130606,
|
|
"learning_rate": 3.0908187387405407e-06,
|
|
"loss": 1.5242578983306885,
|
|
"step": 3784
|
|
},
|
|
{
|
|
"epoch": 1.987920168067227,
|
|
"grad_norm": 7.152017643377308,
|
|
"learning_rate": 3.087994626506254e-06,
|
|
"loss": 1.1745996475219727,
|
|
"step": 3785
|
|
},
|
|
{
|
|
"epoch": 1.9884453781512605,
|
|
"grad_norm": 7.9758913611007864,
|
|
"learning_rate": 3.085171228542284e-06,
|
|
"loss": 1.1497207880020142,
|
|
"step": 3786
|
|
},
|
|
{
|
|
"epoch": 1.9889705882352942,
|
|
"grad_norm": 8.788039493597397,
|
|
"learning_rate": 3.0823485459033707e-06,
|
|
"loss": 1.6483783721923828,
|
|
"step": 3787
|
|
},
|
|
{
|
|
"epoch": 1.9894957983193278,
|
|
"grad_norm": 23.501388834835307,
|
|
"learning_rate": 3.0795265796439876e-06,
|
|
"loss": 2.2067291736602783,
|
|
"step": 3788
|
|
},
|
|
{
|
|
"epoch": 1.9900210084033614,
|
|
"grad_norm": 17.106029301572583,
|
|
"learning_rate": 3.0767053308183416e-06,
|
|
"loss": 1.5530177354812622,
|
|
"step": 3789
|
|
},
|
|
{
|
|
"epoch": 1.990546218487395,
|
|
"grad_norm": 15.098012723446429,
|
|
"learning_rate": 3.073884800480369e-06,
|
|
"loss": 1.292409896850586,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 1.9910714285714286,
|
|
"grad_norm": 16.860824491413464,
|
|
"learning_rate": 3.0710649896837386e-06,
|
|
"loss": 1.5788860321044922,
|
|
"step": 3791
|
|
},
|
|
{
|
|
"epoch": 1.9915966386554622,
|
|
"grad_norm": 14.040446919933688,
|
|
"learning_rate": 3.068245899481851e-06,
|
|
"loss": 1.4469319581985474,
|
|
"step": 3792
|
|
},
|
|
{
|
|
"epoch": 1.9921218487394958,
|
|
"grad_norm": 14.268241574021172,
|
|
"learning_rate": 3.0654275309278382e-06,
|
|
"loss": 1.6369696855545044,
|
|
"step": 3793
|
|
},
|
|
{
|
|
"epoch": 1.9926470588235294,
|
|
"grad_norm": 10.859225606953428,
|
|
"learning_rate": 3.0626098850745616e-06,
|
|
"loss": 1.3132859468460083,
|
|
"step": 3794
|
|
},
|
|
{
|
|
"epoch": 1.993172268907563,
|
|
"grad_norm": 12.100034108218185,
|
|
"learning_rate": 3.0597929629746114e-06,
|
|
"loss": 1.1590535640716553,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 1.9936974789915967,
|
|
"grad_norm": 9.661986359853291,
|
|
"learning_rate": 3.0569767656803106e-06,
|
|
"loss": 1.2120416164398193,
|
|
"step": 3796
|
|
},
|
|
{
|
|
"epoch": 1.9942226890756303,
|
|
"grad_norm": 20.096411091337497,
|
|
"learning_rate": 3.0541612942437095e-06,
|
|
"loss": 1.4346848726272583,
|
|
"step": 3797
|
|
},
|
|
{
|
|
"epoch": 1.9947478991596639,
|
|
"grad_norm": 9.594464947917247,
|
|
"learning_rate": 3.051346549716586e-06,
|
|
"loss": 0.8941363096237183,
|
|
"step": 3798
|
|
},
|
|
{
|
|
"epoch": 1.9952731092436975,
|
|
"grad_norm": 11.038374544376225,
|
|
"learning_rate": 3.0485325331504477e-06,
|
|
"loss": 1.2042505741119385,
|
|
"step": 3799
|
|
},
|
|
{
|
|
"epoch": 1.995798319327731,
|
|
"grad_norm": 9.0076016644004,
|
|
"learning_rate": 3.045719245596533e-06,
|
|
"loss": 1.5936388969421387,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 1.9963235294117647,
|
|
"grad_norm": 11.03543858156108,
|
|
"learning_rate": 3.0429066881058036e-06,
|
|
"loss": 0.7732980251312256,
|
|
"step": 3801
|
|
},
|
|
{
|
|
"epoch": 1.9968487394957983,
|
|
"grad_norm": 12.590583200344826,
|
|
"learning_rate": 3.04009486172895e-06,
|
|
"loss": 1.8531079292297363,
|
|
"step": 3802
|
|
},
|
|
{
|
|
"epoch": 1.997373949579832,
|
|
"grad_norm": 11.735574804351145,
|
|
"learning_rate": 3.037283767516389e-06,
|
|
"loss": 1.287428855895996,
|
|
"step": 3803
|
|
},
|
|
{
|
|
"epoch": 1.9978991596638656,
|
|
"grad_norm": 10.496275378962904,
|
|
"learning_rate": 3.034473406518268e-06,
|
|
"loss": 1.0967342853546143,
|
|
"step": 3804
|
|
},
|
|
{
|
|
"epoch": 1.9984243697478992,
|
|
"grad_norm": 9.232693829502468,
|
|
"learning_rate": 3.031663779784454e-06,
|
|
"loss": 1.1606006622314453,
|
|
"step": 3805
|
|
},
|
|
{
|
|
"epoch": 1.9989495798319328,
|
|
"grad_norm": 18.46446368760907,
|
|
"learning_rate": 3.028854888364543e-06,
|
|
"loss": 1.6747987270355225,
|
|
"step": 3806
|
|
},
|
|
{
|
|
"epoch": 1.9994747899159664,
|
|
"grad_norm": 8.51216303954961,
|
|
"learning_rate": 3.026046733307859e-06,
|
|
"loss": 1.3044161796569824,
|
|
"step": 3807
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 11.305158060912134,
|
|
"learning_rate": 3.0232393156634455e-06,
|
|
"loss": 1.2226500511169434,
|
|
"step": 3808
|
|
},
|
|
{
|
|
"epoch": 2.000525210084034,
|
|
"grad_norm": 11.017549290867258,
|
|
"learning_rate": 3.020432636480074e-06,
|
|
"loss": 0.3734968304634094,
|
|
"step": 3809
|
|
},
|
|
{
|
|
"epoch": 2.0010504201680672,
|
|
"grad_norm": 12.858661865707514,
|
|
"learning_rate": 3.0176266968062386e-06,
|
|
"loss": 0.8799101710319519,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 2.001575630252101,
|
|
"grad_norm": 11.04993858918612,
|
|
"learning_rate": 3.014821497690159e-06,
|
|
"loss": 0.7394332885742188,
|
|
"step": 3811
|
|
},
|
|
{
|
|
"epoch": 2.0021008403361344,
|
|
"grad_norm": 11.846825155028316,
|
|
"learning_rate": 3.0120170401797765e-06,
|
|
"loss": 0.5659482479095459,
|
|
"step": 3812
|
|
},
|
|
{
|
|
"epoch": 2.0026260504201683,
|
|
"grad_norm": 14.090147360190999,
|
|
"learning_rate": 3.0092133253227563e-06,
|
|
"loss": 0.8160654306411743,
|
|
"step": 3813
|
|
},
|
|
{
|
|
"epoch": 2.0031512605042017,
|
|
"grad_norm": 11.588286715579699,
|
|
"learning_rate": 3.0064103541664837e-06,
|
|
"loss": 0.4015316367149353,
|
|
"step": 3814
|
|
},
|
|
{
|
|
"epoch": 2.0036764705882355,
|
|
"grad_norm": 12.505131624805335,
|
|
"learning_rate": 3.0036081277580715e-06,
|
|
"loss": 0.685259222984314,
|
|
"step": 3815
|
|
},
|
|
{
|
|
"epoch": 2.004201680672269,
|
|
"grad_norm": 8.20036099369699,
|
|
"learning_rate": 3.0008066471443486e-06,
|
|
"loss": 0.28031444549560547,
|
|
"step": 3816
|
|
},
|
|
{
|
|
"epoch": 2.0047268907563027,
|
|
"grad_norm": 13.69636019869882,
|
|
"learning_rate": 2.9980059133718687e-06,
|
|
"loss": 0.49320584535598755,
|
|
"step": 3817
|
|
},
|
|
{
|
|
"epoch": 2.005252100840336,
|
|
"grad_norm": 7.201972156017342,
|
|
"learning_rate": 2.995205927486905e-06,
|
|
"loss": 0.5989927649497986,
|
|
"step": 3818
|
|
},
|
|
{
|
|
"epoch": 2.00577731092437,
|
|
"grad_norm": 9.1127502561537,
|
|
"learning_rate": 2.992406690535453e-06,
|
|
"loss": 0.3774471879005432,
|
|
"step": 3819
|
|
},
|
|
{
|
|
"epoch": 2.0063025210084033,
|
|
"grad_norm": 8.926901854429863,
|
|
"learning_rate": 2.989608203563227e-06,
|
|
"loss": 0.6756539344787598,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 2.006827731092437,
|
|
"grad_norm": 6.096267081837672,
|
|
"learning_rate": 2.986810467615659e-06,
|
|
"loss": 0.23616181313991547,
|
|
"step": 3821
|
|
},
|
|
{
|
|
"epoch": 2.0073529411764706,
|
|
"grad_norm": 6.715899318757637,
|
|
"learning_rate": 2.984013483737906e-06,
|
|
"loss": 0.5607936382293701,
|
|
"step": 3822
|
|
},
|
|
{
|
|
"epoch": 2.0078781512605044,
|
|
"grad_norm": 10.095202426849127,
|
|
"learning_rate": 2.9812172529748395e-06,
|
|
"loss": 0.6747167706489563,
|
|
"step": 3823
|
|
},
|
|
{
|
|
"epoch": 2.008403361344538,
|
|
"grad_norm": 6.2149130542882585,
|
|
"learning_rate": 2.978421776371051e-06,
|
|
"loss": 0.14100810885429382,
|
|
"step": 3824
|
|
},
|
|
{
|
|
"epoch": 2.0089285714285716,
|
|
"grad_norm": 8.725547893562963,
|
|
"learning_rate": 2.9756270549708497e-06,
|
|
"loss": 0.9651519656181335,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 2.009453781512605,
|
|
"grad_norm": 13.252615132261226,
|
|
"learning_rate": 2.9728330898182654e-06,
|
|
"loss": 0.5731452107429504,
|
|
"step": 3826
|
|
},
|
|
{
|
|
"epoch": 2.009978991596639,
|
|
"grad_norm": 14.293661270700504,
|
|
"learning_rate": 2.9700398819570414e-06,
|
|
"loss": 1.022626280784607,
|
|
"step": 3827
|
|
},
|
|
{
|
|
"epoch": 2.0105042016806722,
|
|
"grad_norm": 17.175073827609122,
|
|
"learning_rate": 2.9672474324306407e-06,
|
|
"loss": 0.6618038415908813,
|
|
"step": 3828
|
|
},
|
|
{
|
|
"epoch": 2.011029411764706,
|
|
"grad_norm": 10.76623365226854,
|
|
"learning_rate": 2.9644557422822406e-06,
|
|
"loss": 0.47713351249694824,
|
|
"step": 3829
|
|
},
|
|
{
|
|
"epoch": 2.0115546218487395,
|
|
"grad_norm": 18.209384971359377,
|
|
"learning_rate": 2.9616648125547395e-06,
|
|
"loss": 0.7910279035568237,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 2.0120798319327733,
|
|
"grad_norm": 9.744248855130357,
|
|
"learning_rate": 2.9588746442907467e-06,
|
|
"loss": 0.2927781045436859,
|
|
"step": 3831
|
|
},
|
|
{
|
|
"epoch": 2.0126050420168067,
|
|
"grad_norm": 9.067333591111359,
|
|
"learning_rate": 2.9560852385325876e-06,
|
|
"loss": 0.3623805642127991,
|
|
"step": 3832
|
|
},
|
|
{
|
|
"epoch": 2.0131302521008405,
|
|
"grad_norm": 13.754858955605707,
|
|
"learning_rate": 2.9532965963223076e-06,
|
|
"loss": 0.8041249513626099,
|
|
"step": 3833
|
|
},
|
|
{
|
|
"epoch": 2.013655462184874,
|
|
"grad_norm": 14.329204658941654,
|
|
"learning_rate": 2.9505087187016614e-06,
|
|
"loss": 0.711012065410614,
|
|
"step": 3834
|
|
},
|
|
{
|
|
"epoch": 2.0141806722689077,
|
|
"grad_norm": 10.33204702661545,
|
|
"learning_rate": 2.9477216067121196e-06,
|
|
"loss": 0.4064977467060089,
|
|
"step": 3835
|
|
},
|
|
{
|
|
"epoch": 2.014705882352941,
|
|
"grad_norm": 13.262578282956373,
|
|
"learning_rate": 2.9449352613948667e-06,
|
|
"loss": 0.38866499066352844,
|
|
"step": 3836
|
|
},
|
|
{
|
|
"epoch": 2.015231092436975,
|
|
"grad_norm": 8.693634074278547,
|
|
"learning_rate": 2.9421496837908036e-06,
|
|
"loss": 0.48318976163864136,
|
|
"step": 3837
|
|
},
|
|
{
|
|
"epoch": 2.0157563025210083,
|
|
"grad_norm": 10.649148028003987,
|
|
"learning_rate": 2.939364874940541e-06,
|
|
"loss": 1.2861061096191406,
|
|
"step": 3838
|
|
},
|
|
{
|
|
"epoch": 2.016281512605042,
|
|
"grad_norm": 11.18722937697221,
|
|
"learning_rate": 2.936580835884404e-06,
|
|
"loss": 0.2546107769012451,
|
|
"step": 3839
|
|
},
|
|
{
|
|
"epoch": 2.0168067226890756,
|
|
"grad_norm": 13.605156504134753,
|
|
"learning_rate": 2.9337975676624276e-06,
|
|
"loss": 0.42386680841445923,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 2.0173319327731094,
|
|
"grad_norm": 11.149963058265799,
|
|
"learning_rate": 2.9310150713143637e-06,
|
|
"loss": 0.5541623830795288,
|
|
"step": 3841
|
|
},
|
|
{
|
|
"epoch": 2.017857142857143,
|
|
"grad_norm": 10.713457863765639,
|
|
"learning_rate": 2.928233347879671e-06,
|
|
"loss": 0.7140313982963562,
|
|
"step": 3842
|
|
},
|
|
{
|
|
"epoch": 2.0183823529411766,
|
|
"grad_norm": 10.1203504368144,
|
|
"learning_rate": 2.9254523983975224e-06,
|
|
"loss": 0.6277381777763367,
|
|
"step": 3843
|
|
},
|
|
{
|
|
"epoch": 2.01890756302521,
|
|
"grad_norm": 12.302024743997311,
|
|
"learning_rate": 2.9226722239068006e-06,
|
|
"loss": 0.5380294322967529,
|
|
"step": 3844
|
|
},
|
|
{
|
|
"epoch": 2.019432773109244,
|
|
"grad_norm": 13.075573617347679,
|
|
"learning_rate": 2.9198928254461e-06,
|
|
"loss": 0.42165398597717285,
|
|
"step": 3845
|
|
},
|
|
{
|
|
"epoch": 2.0199579831932772,
|
|
"grad_norm": 10.209053773592924,
|
|
"learning_rate": 2.9171142040537225e-06,
|
|
"loss": 0.454216867685318,
|
|
"step": 3846
|
|
},
|
|
{
|
|
"epoch": 2.020483193277311,
|
|
"grad_norm": 9.385643099394109,
|
|
"learning_rate": 2.9143363607676824e-06,
|
|
"loss": 0.4512168765068054,
|
|
"step": 3847
|
|
},
|
|
{
|
|
"epoch": 2.0210084033613445,
|
|
"grad_norm": 16.04554905978146,
|
|
"learning_rate": 2.911559296625701e-06,
|
|
"loss": 0.5049360990524292,
|
|
"step": 3848
|
|
},
|
|
{
|
|
"epoch": 2.0215336134453783,
|
|
"grad_norm": 9.503456802547229,
|
|
"learning_rate": 2.908783012665209e-06,
|
|
"loss": 0.7787807583808899,
|
|
"step": 3849
|
|
},
|
|
{
|
|
"epoch": 2.0220588235294117,
|
|
"grad_norm": 8.08266768016149,
|
|
"learning_rate": 2.9060075099233496e-06,
|
|
"loss": 0.6721572279930115,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 2.0225840336134455,
|
|
"grad_norm": 11.000241803723165,
|
|
"learning_rate": 2.9032327894369693e-06,
|
|
"loss": 0.45701268315315247,
|
|
"step": 3851
|
|
},
|
|
{
|
|
"epoch": 2.023109243697479,
|
|
"grad_norm": 7.913564797158489,
|
|
"learning_rate": 2.9004588522426235e-06,
|
|
"loss": 0.3375888168811798,
|
|
"step": 3852
|
|
},
|
|
{
|
|
"epoch": 2.0236344537815127,
|
|
"grad_norm": 14.779120786899409,
|
|
"learning_rate": 2.8976856993765766e-06,
|
|
"loss": 0.350841224193573,
|
|
"step": 3853
|
|
},
|
|
{
|
|
"epoch": 2.024159663865546,
|
|
"grad_norm": 12.045942359753681,
|
|
"learning_rate": 2.894913331874798e-06,
|
|
"loss": 0.714718222618103,
|
|
"step": 3854
|
|
},
|
|
{
|
|
"epoch": 2.02468487394958,
|
|
"grad_norm": 6.860206953796827,
|
|
"learning_rate": 2.8921417507729644e-06,
|
|
"loss": 0.5312122106552124,
|
|
"step": 3855
|
|
},
|
|
{
|
|
"epoch": 2.0252100840336134,
|
|
"grad_norm": 18.674079867961407,
|
|
"learning_rate": 2.8893709571064575e-06,
|
|
"loss": 0.7049558162689209,
|
|
"step": 3856
|
|
},
|
|
{
|
|
"epoch": 2.025735294117647,
|
|
"grad_norm": 9.550567764233456,
|
|
"learning_rate": 2.8866009519103705e-06,
|
|
"loss": 0.29532164335250854,
|
|
"step": 3857
|
|
},
|
|
{
|
|
"epoch": 2.0262605042016806,
|
|
"grad_norm": 9.16837068693569,
|
|
"learning_rate": 2.883831736219495e-06,
|
|
"loss": 0.35209500789642334,
|
|
"step": 3858
|
|
},
|
|
{
|
|
"epoch": 2.0267857142857144,
|
|
"grad_norm": 13.896448359397509,
|
|
"learning_rate": 2.8810633110683312e-06,
|
|
"loss": 0.6380374431610107,
|
|
"step": 3859
|
|
},
|
|
{
|
|
"epoch": 2.027310924369748,
|
|
"grad_norm": 11.764312168328866,
|
|
"learning_rate": 2.878295677491083e-06,
|
|
"loss": 0.6972619891166687,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 2.0278361344537816,
|
|
"grad_norm": 12.943753016303793,
|
|
"learning_rate": 2.875528836521658e-06,
|
|
"loss": 0.44367527961730957,
|
|
"step": 3861
|
|
},
|
|
{
|
|
"epoch": 2.028361344537815,
|
|
"grad_norm": 11.529936458877117,
|
|
"learning_rate": 2.8727627891936705e-06,
|
|
"loss": 0.9527031779289246,
|
|
"step": 3862
|
|
},
|
|
{
|
|
"epoch": 2.028886554621849,
|
|
"grad_norm": 11.630298224019956,
|
|
"learning_rate": 2.869997536540435e-06,
|
|
"loss": 0.5449413061141968,
|
|
"step": 3863
|
|
},
|
|
{
|
|
"epoch": 2.0294117647058822,
|
|
"grad_norm": 9.075541387998747,
|
|
"learning_rate": 2.8672330795949683e-06,
|
|
"loss": 0.3742218017578125,
|
|
"step": 3864
|
|
},
|
|
{
|
|
"epoch": 2.029936974789916,
|
|
"grad_norm": 15.437416550452978,
|
|
"learning_rate": 2.864469419389997e-06,
|
|
"loss": 1.433685064315796,
|
|
"step": 3865
|
|
},
|
|
{
|
|
"epoch": 2.0304621848739495,
|
|
"grad_norm": 18.644197410513236,
|
|
"learning_rate": 2.8617065569579406e-06,
|
|
"loss": 1.8893550634384155,
|
|
"step": 3866
|
|
},
|
|
{
|
|
"epoch": 2.0309873949579833,
|
|
"grad_norm": 8.28267567388336,
|
|
"learning_rate": 2.858944493330929e-06,
|
|
"loss": 0.27451810240745544,
|
|
"step": 3867
|
|
},
|
|
{
|
|
"epoch": 2.0315126050420167,
|
|
"grad_norm": 8.978545368849838,
|
|
"learning_rate": 2.856183229540789e-06,
|
|
"loss": 0.4551701843738556,
|
|
"step": 3868
|
|
},
|
|
{
|
|
"epoch": 2.0320378151260505,
|
|
"grad_norm": 12.2975837103672,
|
|
"learning_rate": 2.8534227666190484e-06,
|
|
"loss": 0.8021912574768066,
|
|
"step": 3869
|
|
},
|
|
{
|
|
"epoch": 2.032563025210084,
|
|
"grad_norm": 7.900657783165174,
|
|
"learning_rate": 2.850663105596937e-06,
|
|
"loss": 0.3059179186820984,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 2.0330882352941178,
|
|
"grad_norm": 10.982164455366245,
|
|
"learning_rate": 2.8479042475053853e-06,
|
|
"loss": 0.4565742015838623,
|
|
"step": 3871
|
|
},
|
|
{
|
|
"epoch": 2.033613445378151,
|
|
"grad_norm": 9.12506877974996,
|
|
"learning_rate": 2.8451461933750237e-06,
|
|
"loss": 0.477339506149292,
|
|
"step": 3872
|
|
},
|
|
{
|
|
"epoch": 2.034138655462185,
|
|
"grad_norm": 11.485943345713027,
|
|
"learning_rate": 2.8423889442361797e-06,
|
|
"loss": 0.8305515050888062,
|
|
"step": 3873
|
|
},
|
|
{
|
|
"epoch": 2.0346638655462184,
|
|
"grad_norm": 10.011597215714765,
|
|
"learning_rate": 2.8396325011188865e-06,
|
|
"loss": 0.39306312799453735,
|
|
"step": 3874
|
|
},
|
|
{
|
|
"epoch": 2.035189075630252,
|
|
"grad_norm": 12.329547083437253,
|
|
"learning_rate": 2.83687686505287e-06,
|
|
"loss": 0.4426557719707489,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 2.0357142857142856,
|
|
"grad_norm": 10.078565663032956,
|
|
"learning_rate": 2.834122037067556e-06,
|
|
"loss": 0.3150191307067871,
|
|
"step": 3876
|
|
},
|
|
{
|
|
"epoch": 2.0362394957983194,
|
|
"grad_norm": 13.17037838338679,
|
|
"learning_rate": 2.831368018192071e-06,
|
|
"loss": 0.9622311592102051,
|
|
"step": 3877
|
|
},
|
|
{
|
|
"epoch": 2.036764705882353,
|
|
"grad_norm": 12.099593531796275,
|
|
"learning_rate": 2.828614809455236e-06,
|
|
"loss": 0.26061639189720154,
|
|
"step": 3878
|
|
},
|
|
{
|
|
"epoch": 2.0372899159663866,
|
|
"grad_norm": 7.499008840841319,
|
|
"learning_rate": 2.8258624118855704e-06,
|
|
"loss": 0.4080360233783722,
|
|
"step": 3879
|
|
},
|
|
{
|
|
"epoch": 2.03781512605042,
|
|
"grad_norm": 10.851118097126037,
|
|
"learning_rate": 2.8231108265112905e-06,
|
|
"loss": 0.43404507637023926,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 2.038340336134454,
|
|
"grad_norm": 10.549301939240816,
|
|
"learning_rate": 2.8203600543603116e-06,
|
|
"loss": 0.273033082485199,
|
|
"step": 3881
|
|
},
|
|
{
|
|
"epoch": 2.0388655462184873,
|
|
"grad_norm": 9.691882665058618,
|
|
"learning_rate": 2.817610096460243e-06,
|
|
"loss": 0.41321203112602234,
|
|
"step": 3882
|
|
},
|
|
{
|
|
"epoch": 2.039390756302521,
|
|
"grad_norm": 9.53414614050278,
|
|
"learning_rate": 2.814860953838389e-06,
|
|
"loss": 0.6547293066978455,
|
|
"step": 3883
|
|
},
|
|
{
|
|
"epoch": 2.0399159663865545,
|
|
"grad_norm": 11.465218009629313,
|
|
"learning_rate": 2.8121126275217514e-06,
|
|
"loss": 0.43598318099975586,
|
|
"step": 3884
|
|
},
|
|
{
|
|
"epoch": 2.0404411764705883,
|
|
"grad_norm": 16.61752688347944,
|
|
"learning_rate": 2.809365118537024e-06,
|
|
"loss": 1.627102255821228,
|
|
"step": 3885
|
|
},
|
|
{
|
|
"epoch": 2.0409663865546217,
|
|
"grad_norm": 14.211525186101717,
|
|
"learning_rate": 2.8066184279106e-06,
|
|
"loss": 1.2373404502868652,
|
|
"step": 3886
|
|
},
|
|
{
|
|
"epoch": 2.0414915966386555,
|
|
"grad_norm": 9.062227856513767,
|
|
"learning_rate": 2.8038725566685596e-06,
|
|
"loss": 0.1954503208398819,
|
|
"step": 3887
|
|
},
|
|
{
|
|
"epoch": 2.042016806722689,
|
|
"grad_norm": 13.707453553033107,
|
|
"learning_rate": 2.801127505836687e-06,
|
|
"loss": 0.5624500513076782,
|
|
"step": 3888
|
|
},
|
|
{
|
|
"epoch": 2.0425420168067228,
|
|
"grad_norm": 8.663227093017563,
|
|
"learning_rate": 2.7983832764404517e-06,
|
|
"loss": 0.6806732416152954,
|
|
"step": 3889
|
|
},
|
|
{
|
|
"epoch": 2.043067226890756,
|
|
"grad_norm": 10.834192493328237,
|
|
"learning_rate": 2.79563986950502e-06,
|
|
"loss": 0.5868386030197144,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 2.04359243697479,
|
|
"grad_norm": 11.192581350434342,
|
|
"learning_rate": 2.7928972860552482e-06,
|
|
"loss": 0.6238075494766235,
|
|
"step": 3891
|
|
},
|
|
{
|
|
"epoch": 2.0441176470588234,
|
|
"grad_norm": 12.320260608284606,
|
|
"learning_rate": 2.7901555271156887e-06,
|
|
"loss": 0.5835066437721252,
|
|
"step": 3892
|
|
},
|
|
{
|
|
"epoch": 2.044642857142857,
|
|
"grad_norm": 10.049967753868659,
|
|
"learning_rate": 2.787414593710583e-06,
|
|
"loss": 0.6015195846557617,
|
|
"step": 3893
|
|
},
|
|
{
|
|
"epoch": 2.0451680672268906,
|
|
"grad_norm": 13.849181672895295,
|
|
"learning_rate": 2.784674486863865e-06,
|
|
"loss": 0.6851204633712769,
|
|
"step": 3894
|
|
},
|
|
{
|
|
"epoch": 2.0456932773109244,
|
|
"grad_norm": 7.702221390228503,
|
|
"learning_rate": 2.781935207599158e-06,
|
|
"loss": 0.1196926012635231,
|
|
"step": 3895
|
|
},
|
|
{
|
|
"epoch": 2.046218487394958,
|
|
"grad_norm": 10.13467570800104,
|
|
"learning_rate": 2.7791967569397815e-06,
|
|
"loss": 0.35627737641334534,
|
|
"step": 3896
|
|
},
|
|
{
|
|
"epoch": 2.0467436974789917,
|
|
"grad_norm": 14.420159313353103,
|
|
"learning_rate": 2.7764591359087415e-06,
|
|
"loss": 0.3981572389602661,
|
|
"step": 3897
|
|
},
|
|
{
|
|
"epoch": 2.047268907563025,
|
|
"grad_norm": 12.415736623303292,
|
|
"learning_rate": 2.7737223455287343e-06,
|
|
"loss": 0.47580811381340027,
|
|
"step": 3898
|
|
},
|
|
{
|
|
"epoch": 2.047794117647059,
|
|
"grad_norm": 8.037988546145193,
|
|
"learning_rate": 2.770986386822145e-06,
|
|
"loss": 0.4026325047016144,
|
|
"step": 3899
|
|
},
|
|
{
|
|
"epoch": 2.0483193277310923,
|
|
"grad_norm": 12.677038294628572,
|
|
"learning_rate": 2.76825126081105e-06,
|
|
"loss": 0.589562177658081,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 2.048844537815126,
|
|
"grad_norm": 14.783707895557464,
|
|
"learning_rate": 2.7655169685172146e-06,
|
|
"loss": 1.1209787130355835,
|
|
"step": 3901
|
|
},
|
|
{
|
|
"epoch": 2.0493697478991595,
|
|
"grad_norm": 8.33452082081271,
|
|
"learning_rate": 2.7627835109620886e-06,
|
|
"loss": 0.2675451636314392,
|
|
"step": 3902
|
|
},
|
|
{
|
|
"epoch": 2.0498949579831933,
|
|
"grad_norm": 7.664460906222028,
|
|
"learning_rate": 2.760050889166818e-06,
|
|
"loss": 0.24929499626159668,
|
|
"step": 3903
|
|
},
|
|
{
|
|
"epoch": 2.0504201680672267,
|
|
"grad_norm": 9.846864581004077,
|
|
"learning_rate": 2.7573191041522297e-06,
|
|
"loss": 0.42551344633102417,
|
|
"step": 3904
|
|
},
|
|
{
|
|
"epoch": 2.0509453781512605,
|
|
"grad_norm": 8.909887491294871,
|
|
"learning_rate": 2.7545881569388404e-06,
|
|
"loss": 0.5701692700386047,
|
|
"step": 3905
|
|
},
|
|
{
|
|
"epoch": 2.051470588235294,
|
|
"grad_norm": 11.035772058186344,
|
|
"learning_rate": 2.751858048546853e-06,
|
|
"loss": 0.6184131503105164,
|
|
"step": 3906
|
|
},
|
|
{
|
|
"epoch": 2.0519957983193278,
|
|
"grad_norm": 12.446621976916722,
|
|
"learning_rate": 2.7491287799961586e-06,
|
|
"loss": 0.32249441742897034,
|
|
"step": 3907
|
|
},
|
|
{
|
|
"epoch": 2.052521008403361,
|
|
"grad_norm": 10.6240426881154,
|
|
"learning_rate": 2.746400352306332e-06,
|
|
"loss": 0.8945693969726562,
|
|
"step": 3908
|
|
},
|
|
{
|
|
"epoch": 2.053046218487395,
|
|
"grad_norm": 9.27758123897025,
|
|
"learning_rate": 2.7436727664966368e-06,
|
|
"loss": 0.6663864850997925,
|
|
"step": 3909
|
|
},
|
|
{
|
|
"epoch": 2.0535714285714284,
|
|
"grad_norm": 13.679583878748852,
|
|
"learning_rate": 2.7409460235860185e-06,
|
|
"loss": 0.34769487380981445,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 2.054096638655462,
|
|
"grad_norm": 11.696669671157458,
|
|
"learning_rate": 2.738220124593114e-06,
|
|
"loss": 0.38508152961730957,
|
|
"step": 3911
|
|
},
|
|
{
|
|
"epoch": 2.0546218487394956,
|
|
"grad_norm": 10.339399386513577,
|
|
"learning_rate": 2.7354950705362384e-06,
|
|
"loss": 0.8562803268432617,
|
|
"step": 3912
|
|
},
|
|
{
|
|
"epoch": 2.0551470588235294,
|
|
"grad_norm": 9.77377633192539,
|
|
"learning_rate": 2.7327708624333936e-06,
|
|
"loss": 0.4563913941383362,
|
|
"step": 3913
|
|
},
|
|
{
|
|
"epoch": 2.055672268907563,
|
|
"grad_norm": 11.50429196068152,
|
|
"learning_rate": 2.7300475013022666e-06,
|
|
"loss": 0.8931875824928284,
|
|
"step": 3914
|
|
},
|
|
{
|
|
"epoch": 2.0561974789915967,
|
|
"grad_norm": 8.247738013539651,
|
|
"learning_rate": 2.7273249881602257e-06,
|
|
"loss": 0.39803647994995117,
|
|
"step": 3915
|
|
},
|
|
{
|
|
"epoch": 2.05672268907563,
|
|
"grad_norm": 11.481771666631674,
|
|
"learning_rate": 2.7246033240243246e-06,
|
|
"loss": 0.6777544021606445,
|
|
"step": 3916
|
|
},
|
|
{
|
|
"epoch": 2.057247899159664,
|
|
"grad_norm": 6.61136629234512,
|
|
"learning_rate": 2.7218825099112966e-06,
|
|
"loss": 0.31973081827163696,
|
|
"step": 3917
|
|
},
|
|
{
|
|
"epoch": 2.0577731092436973,
|
|
"grad_norm": 13.368749614123901,
|
|
"learning_rate": 2.7191625468375636e-06,
|
|
"loss": 0.47313475608825684,
|
|
"step": 3918
|
|
},
|
|
{
|
|
"epoch": 2.058298319327731,
|
|
"grad_norm": 14.178979557281286,
|
|
"learning_rate": 2.7164434358192236e-06,
|
|
"loss": 0.42458271980285645,
|
|
"step": 3919
|
|
},
|
|
{
|
|
"epoch": 2.0588235294117645,
|
|
"grad_norm": 16.269726023071627,
|
|
"learning_rate": 2.7137251778720587e-06,
|
|
"loss": 0.7796242237091064,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 2.0593487394957983,
|
|
"grad_norm": 13.820851522676165,
|
|
"learning_rate": 2.7110077740115315e-06,
|
|
"loss": 0.3527318239212036,
|
|
"step": 3921
|
|
},
|
|
{
|
|
"epoch": 2.0598739495798317,
|
|
"grad_norm": 11.202078971691062,
|
|
"learning_rate": 2.708291225252787e-06,
|
|
"loss": 0.36367225646972656,
|
|
"step": 3922
|
|
},
|
|
{
|
|
"epoch": 2.0603991596638656,
|
|
"grad_norm": 10.148155009117348,
|
|
"learning_rate": 2.705575532610649e-06,
|
|
"loss": 0.47870922088623047,
|
|
"step": 3923
|
|
},
|
|
{
|
|
"epoch": 2.060924369747899,
|
|
"grad_norm": 6.3429649055401764,
|
|
"learning_rate": 2.702860697099621e-06,
|
|
"loss": 0.24576593935489655,
|
|
"step": 3924
|
|
},
|
|
{
|
|
"epoch": 2.0614495798319328,
|
|
"grad_norm": 7.984679998337307,
|
|
"learning_rate": 2.7001467197338905e-06,
|
|
"loss": 0.3550044894218445,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 2.0619747899159666,
|
|
"grad_norm": 8.67987105281844,
|
|
"learning_rate": 2.6974336015273204e-06,
|
|
"loss": 0.32794153690338135,
|
|
"step": 3926
|
|
},
|
|
{
|
|
"epoch": 2.0625,
|
|
"grad_norm": 8.162135256358189,
|
|
"learning_rate": 2.6947213434934537e-06,
|
|
"loss": 0.3276750147342682,
|
|
"step": 3927
|
|
},
|
|
{
|
|
"epoch": 2.0630252100840334,
|
|
"grad_norm": 14.703477596563303,
|
|
"learning_rate": 2.692009946645513e-06,
|
|
"loss": 0.5918416976928711,
|
|
"step": 3928
|
|
},
|
|
{
|
|
"epoch": 2.0635504201680672,
|
|
"grad_norm": 7.921644943925436,
|
|
"learning_rate": 2.6892994119963965e-06,
|
|
"loss": 0.5117301940917969,
|
|
"step": 3929
|
|
},
|
|
{
|
|
"epoch": 2.064075630252101,
|
|
"grad_norm": 8.833998811344054,
|
|
"learning_rate": 2.686589740558683e-06,
|
|
"loss": 0.325096070766449,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 2.0646008403361344,
|
|
"grad_norm": 14.840585915945963,
|
|
"learning_rate": 2.683880933344628e-06,
|
|
"loss": 0.8907138109207153,
|
|
"step": 3931
|
|
},
|
|
{
|
|
"epoch": 2.0651260504201683,
|
|
"grad_norm": 12.389823698067618,
|
|
"learning_rate": 2.681172991366162e-06,
|
|
"loss": 0.7546146512031555,
|
|
"step": 3932
|
|
},
|
|
{
|
|
"epoch": 2.0656512605042017,
|
|
"grad_norm": 11.39509040830051,
|
|
"learning_rate": 2.678465915634899e-06,
|
|
"loss": 0.39673149585723877,
|
|
"step": 3933
|
|
},
|
|
{
|
|
"epoch": 2.0661764705882355,
|
|
"grad_norm": 13.020760188029561,
|
|
"learning_rate": 2.675759707162122e-06,
|
|
"loss": 0.6929548978805542,
|
|
"step": 3934
|
|
},
|
|
{
|
|
"epoch": 2.066701680672269,
|
|
"grad_norm": 7.541337829367985,
|
|
"learning_rate": 2.6730543669587927e-06,
|
|
"loss": 0.5798717737197876,
|
|
"step": 3935
|
|
},
|
|
{
|
|
"epoch": 2.0672268907563027,
|
|
"grad_norm": 11.399322480534979,
|
|
"learning_rate": 2.670349896035549e-06,
|
|
"loss": 1.1540753841400146,
|
|
"step": 3936
|
|
},
|
|
{
|
|
"epoch": 2.067752100840336,
|
|
"grad_norm": 7.5590983302190375,
|
|
"learning_rate": 2.667646295402704e-06,
|
|
"loss": 0.31627383828163147,
|
|
"step": 3937
|
|
},
|
|
{
|
|
"epoch": 2.06827731092437,
|
|
"grad_norm": 16.506451660048146,
|
|
"learning_rate": 2.664943566070244e-06,
|
|
"loss": 0.3912052810192108,
|
|
"step": 3938
|
|
},
|
|
{
|
|
"epoch": 2.0688025210084033,
|
|
"grad_norm": 17.581512777706713,
|
|
"learning_rate": 2.6622417090478304e-06,
|
|
"loss": 0.6730505228042603,
|
|
"step": 3939
|
|
},
|
|
{
|
|
"epoch": 2.069327731092437,
|
|
"grad_norm": 9.1182455516819,
|
|
"learning_rate": 2.6595407253448025e-06,
|
|
"loss": 0.7248634099960327,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 2.0698529411764706,
|
|
"grad_norm": 10.391271026062176,
|
|
"learning_rate": 2.656840615970169e-06,
|
|
"loss": 0.5894888043403625,
|
|
"step": 3941
|
|
},
|
|
{
|
|
"epoch": 2.0703781512605044,
|
|
"grad_norm": 6.29550669571111,
|
|
"learning_rate": 2.6541413819326123e-06,
|
|
"loss": 0.5516920685768127,
|
|
"step": 3942
|
|
},
|
|
{
|
|
"epoch": 2.070903361344538,
|
|
"grad_norm": 8.091673595762389,
|
|
"learning_rate": 2.651443024240489e-06,
|
|
"loss": 0.7397844195365906,
|
|
"step": 3943
|
|
},
|
|
{
|
|
"epoch": 2.0714285714285716,
|
|
"grad_norm": 9.549603541141096,
|
|
"learning_rate": 2.6487455439018295e-06,
|
|
"loss": 0.4368700385093689,
|
|
"step": 3944
|
|
},
|
|
{
|
|
"epoch": 2.071953781512605,
|
|
"grad_norm": 9.408959892705115,
|
|
"learning_rate": 2.646048941924333e-06,
|
|
"loss": 0.4290487766265869,
|
|
"step": 3945
|
|
},
|
|
{
|
|
"epoch": 2.072478991596639,
|
|
"grad_norm": 9.45932152508389,
|
|
"learning_rate": 2.643353219315374e-06,
|
|
"loss": 0.44970598816871643,
|
|
"step": 3946
|
|
},
|
|
{
|
|
"epoch": 2.0730042016806722,
|
|
"grad_norm": 8.10109812165019,
|
|
"learning_rate": 2.6406583770819946e-06,
|
|
"loss": 0.6798231601715088,
|
|
"step": 3947
|
|
},
|
|
{
|
|
"epoch": 2.073529411764706,
|
|
"grad_norm": 10.04046574807137,
|
|
"learning_rate": 2.6379644162309135e-06,
|
|
"loss": 0.44904816150665283,
|
|
"step": 3948
|
|
},
|
|
{
|
|
"epoch": 2.0740546218487395,
|
|
"grad_norm": 11.184632817122909,
|
|
"learning_rate": 2.635271337768517e-06,
|
|
"loss": 0.5745569467544556,
|
|
"step": 3949
|
|
},
|
|
{
|
|
"epoch": 2.0745798319327733,
|
|
"grad_norm": 31.641268530029024,
|
|
"learning_rate": 2.632579142700861e-06,
|
|
"loss": 0.8730089664459229,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 2.0751050420168067,
|
|
"grad_norm": 12.92673192821059,
|
|
"learning_rate": 2.629887832033672e-06,
|
|
"loss": 0.32680264115333557,
|
|
"step": 3951
|
|
},
|
|
{
|
|
"epoch": 2.0756302521008405,
|
|
"grad_norm": 13.976098941662803,
|
|
"learning_rate": 2.6271974067723464e-06,
|
|
"loss": 0.3138732314109802,
|
|
"step": 3952
|
|
},
|
|
{
|
|
"epoch": 2.076155462184874,
|
|
"grad_norm": 12.995950302631732,
|
|
"learning_rate": 2.6245078679219503e-06,
|
|
"loss": 0.6802486777305603,
|
|
"step": 3953
|
|
},
|
|
{
|
|
"epoch": 2.0766806722689077,
|
|
"grad_norm": 8.30181281232033,
|
|
"learning_rate": 2.621819216487216e-06,
|
|
"loss": 0.7510164976119995,
|
|
"step": 3954
|
|
},
|
|
{
|
|
"epoch": 2.077205882352941,
|
|
"grad_norm": 14.158626810720211,
|
|
"learning_rate": 2.619131453472551e-06,
|
|
"loss": 0.47994887828826904,
|
|
"step": 3955
|
|
},
|
|
{
|
|
"epoch": 2.077731092436975,
|
|
"grad_norm": 32.55403575755407,
|
|
"learning_rate": 2.6164445798820235e-06,
|
|
"loss": 0.6356985569000244,
|
|
"step": 3956
|
|
},
|
|
{
|
|
"epoch": 2.0782563025210083,
|
|
"grad_norm": 11.353833429346354,
|
|
"learning_rate": 2.613758596719373e-06,
|
|
"loss": 0.7022428512573242,
|
|
"step": 3957
|
|
},
|
|
{
|
|
"epoch": 2.078781512605042,
|
|
"grad_norm": 10.912176236666568,
|
|
"learning_rate": 2.6110735049880054e-06,
|
|
"loss": 0.9900159239768982,
|
|
"step": 3958
|
|
},
|
|
{
|
|
"epoch": 2.0793067226890756,
|
|
"grad_norm": 12.037717700323714,
|
|
"learning_rate": 2.6083893056909937e-06,
|
|
"loss": 0.3570025563240051,
|
|
"step": 3959
|
|
},
|
|
{
|
|
"epoch": 2.0798319327731094,
|
|
"grad_norm": 10.470781339000474,
|
|
"learning_rate": 2.6057059998310775e-06,
|
|
"loss": 0.5455389022827148,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 2.080357142857143,
|
|
"grad_norm": 14.065778481096347,
|
|
"learning_rate": 2.603023588410662e-06,
|
|
"loss": 0.4174801707267761,
|
|
"step": 3961
|
|
},
|
|
{
|
|
"epoch": 2.0808823529411766,
|
|
"grad_norm": 15.10690979874097,
|
|
"learning_rate": 2.6003420724318217e-06,
|
|
"loss": 0.8283196091651917,
|
|
"step": 3962
|
|
},
|
|
{
|
|
"epoch": 2.08140756302521,
|
|
"grad_norm": 6.956561241135267,
|
|
"learning_rate": 2.597661452896293e-06,
|
|
"loss": 0.2815828323364258,
|
|
"step": 3963
|
|
},
|
|
{
|
|
"epoch": 2.081932773109244,
|
|
"grad_norm": 8.890531207086603,
|
|
"learning_rate": 2.594981730805478e-06,
|
|
"loss": 0.5109238624572754,
|
|
"step": 3964
|
|
},
|
|
{
|
|
"epoch": 2.0824579831932772,
|
|
"grad_norm": 10.572296459698237,
|
|
"learning_rate": 2.5923029071604443e-06,
|
|
"loss": 0.5378514528274536,
|
|
"step": 3965
|
|
},
|
|
{
|
|
"epoch": 2.082983193277311,
|
|
"grad_norm": 19.01366643481593,
|
|
"learning_rate": 2.589624982961923e-06,
|
|
"loss": 0.4682660698890686,
|
|
"step": 3966
|
|
},
|
|
{
|
|
"epoch": 2.0835084033613445,
|
|
"grad_norm": 16.041941215634868,
|
|
"learning_rate": 2.5869479592103096e-06,
|
|
"loss": 0.8170668482780457,
|
|
"step": 3967
|
|
},
|
|
{
|
|
"epoch": 2.0840336134453783,
|
|
"grad_norm": 14.90220450351714,
|
|
"learning_rate": 2.584271836905664e-06,
|
|
"loss": 2.5968964099884033,
|
|
"step": 3968
|
|
},
|
|
{
|
|
"epoch": 2.0845588235294117,
|
|
"grad_norm": 10.322535502652263,
|
|
"learning_rate": 2.5815966170477065e-06,
|
|
"loss": 0.36919355392456055,
|
|
"step": 3969
|
|
},
|
|
{
|
|
"epoch": 2.0850840336134455,
|
|
"grad_norm": 12.3053493625888,
|
|
"learning_rate": 2.5789223006358257e-06,
|
|
"loss": 0.52881920337677,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 2.085609243697479,
|
|
"grad_norm": 10.821405748592051,
|
|
"learning_rate": 2.5762488886690682e-06,
|
|
"loss": 0.5289602279663086,
|
|
"step": 3971
|
|
},
|
|
{
|
|
"epoch": 2.0861344537815127,
|
|
"grad_norm": 9.47215839458051,
|
|
"learning_rate": 2.5735763821461436e-06,
|
|
"loss": 0.5808546543121338,
|
|
"step": 3972
|
|
},
|
|
{
|
|
"epoch": 2.086659663865546,
|
|
"grad_norm": 21.32001531849674,
|
|
"learning_rate": 2.5709047820654236e-06,
|
|
"loss": 2.2433700561523438,
|
|
"step": 3973
|
|
},
|
|
{
|
|
"epoch": 2.08718487394958,
|
|
"grad_norm": 10.004597607684564,
|
|
"learning_rate": 2.5682340894249413e-06,
|
|
"loss": 0.4893033802509308,
|
|
"step": 3974
|
|
},
|
|
{
|
|
"epoch": 2.0877100840336134,
|
|
"grad_norm": 8.013118652559115,
|
|
"learning_rate": 2.5655643052223915e-06,
|
|
"loss": 0.24237364530563354,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 2.088235294117647,
|
|
"grad_norm": 10.646044773671434,
|
|
"learning_rate": 2.5628954304551263e-06,
|
|
"loss": 0.8329358100891113,
|
|
"step": 3976
|
|
},
|
|
{
|
|
"epoch": 2.0887605042016806,
|
|
"grad_norm": 12.6636279499299,
|
|
"learning_rate": 2.5602274661201643e-06,
|
|
"loss": 0.3509142994880676,
|
|
"step": 3977
|
|
},
|
|
{
|
|
"epoch": 2.0892857142857144,
|
|
"grad_norm": 7.382820939104313,
|
|
"learning_rate": 2.5575604132141792e-06,
|
|
"loss": 0.6189857721328735,
|
|
"step": 3978
|
|
},
|
|
{
|
|
"epoch": 2.089810924369748,
|
|
"grad_norm": 9.669628833459093,
|
|
"learning_rate": 2.5548942727335046e-06,
|
|
"loss": 0.6151308417320251,
|
|
"step": 3979
|
|
},
|
|
{
|
|
"epoch": 2.0903361344537816,
|
|
"grad_norm": 13.405455885657206,
|
|
"learning_rate": 2.552229045674135e-06,
|
|
"loss": 0.37452220916748047,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 2.090861344537815,
|
|
"grad_norm": 7.3374594189967866,
|
|
"learning_rate": 2.549564733031722e-06,
|
|
"loss": 0.7517160773277283,
|
|
"step": 3981
|
|
},
|
|
{
|
|
"epoch": 2.091386554621849,
|
|
"grad_norm": 8.72194658983393,
|
|
"learning_rate": 2.5469013358015765e-06,
|
|
"loss": 0.4801388680934906,
|
|
"step": 3982
|
|
},
|
|
{
|
|
"epoch": 2.0919117647058822,
|
|
"grad_norm": 7.948986903005862,
|
|
"learning_rate": 2.5442388549786668e-06,
|
|
"loss": 0.08267168700695038,
|
|
"step": 3983
|
|
},
|
|
{
|
|
"epoch": 2.092436974789916,
|
|
"grad_norm": 20.216837402667025,
|
|
"learning_rate": 2.5415772915576186e-06,
|
|
"loss": 0.5310105681419373,
|
|
"step": 3984
|
|
},
|
|
{
|
|
"epoch": 2.0929621848739495,
|
|
"grad_norm": 8.345302185716593,
|
|
"learning_rate": 2.538916646532718e-06,
|
|
"loss": 0.5747807025909424,
|
|
"step": 3985
|
|
},
|
|
{
|
|
"epoch": 2.0934873949579833,
|
|
"grad_norm": 14.033375458667514,
|
|
"learning_rate": 2.536256920897905e-06,
|
|
"loss": 0.3640735149383545,
|
|
"step": 3986
|
|
},
|
|
{
|
|
"epoch": 2.0940126050420167,
|
|
"grad_norm": 9.844116766559278,
|
|
"learning_rate": 2.5335981156467755e-06,
|
|
"loss": 0.4546549916267395,
|
|
"step": 3987
|
|
},
|
|
{
|
|
"epoch": 2.0945378151260505,
|
|
"grad_norm": 10.084119867347672,
|
|
"learning_rate": 2.530940231772584e-06,
|
|
"loss": 0.3367902636528015,
|
|
"step": 3988
|
|
},
|
|
{
|
|
"epoch": 2.095063025210084,
|
|
"grad_norm": 11.81220005335899,
|
|
"learning_rate": 2.528283270268238e-06,
|
|
"loss": 0.48261862993240356,
|
|
"step": 3989
|
|
},
|
|
{
|
|
"epoch": 2.0955882352941178,
|
|
"grad_norm": 8.260811024275789,
|
|
"learning_rate": 2.5256272321263037e-06,
|
|
"loss": 0.5103731155395508,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 2.096113445378151,
|
|
"grad_norm": 10.929352655377272,
|
|
"learning_rate": 2.5229721183389975e-06,
|
|
"loss": 0.3871062994003296,
|
|
"step": 3991
|
|
},
|
|
{
|
|
"epoch": 2.096638655462185,
|
|
"grad_norm": 13.228202794028162,
|
|
"learning_rate": 2.5203179298981996e-06,
|
|
"loss": 0.9053060412406921,
|
|
"step": 3992
|
|
},
|
|
{
|
|
"epoch": 2.0971638655462184,
|
|
"grad_norm": 8.237409376816085,
|
|
"learning_rate": 2.517664667795434e-06,
|
|
"loss": 0.64473956823349,
|
|
"step": 3993
|
|
},
|
|
{
|
|
"epoch": 2.097689075630252,
|
|
"grad_norm": 9.071831782893424,
|
|
"learning_rate": 2.5150123330218846e-06,
|
|
"loss": 0.22496303915977478,
|
|
"step": 3994
|
|
},
|
|
{
|
|
"epoch": 2.0982142857142856,
|
|
"grad_norm": 24.95086648414235,
|
|
"learning_rate": 2.512360926568388e-06,
|
|
"loss": 1.565203309059143,
|
|
"step": 3995
|
|
},
|
|
{
|
|
"epoch": 2.0987394957983194,
|
|
"grad_norm": 12.279797914154937,
|
|
"learning_rate": 2.509710449425432e-06,
|
|
"loss": 0.6211872100830078,
|
|
"step": 3996
|
|
},
|
|
{
|
|
"epoch": 2.099264705882353,
|
|
"grad_norm": 7.942257640316419,
|
|
"learning_rate": 2.5070609025831605e-06,
|
|
"loss": 0.6003269553184509,
|
|
"step": 3997
|
|
},
|
|
{
|
|
"epoch": 2.0997899159663866,
|
|
"grad_norm": 13.367055926203495,
|
|
"learning_rate": 2.5044122870313647e-06,
|
|
"loss": 0.5250446200370789,
|
|
"step": 3998
|
|
},
|
|
{
|
|
"epoch": 2.10031512605042,
|
|
"grad_norm": 14.042664302189854,
|
|
"learning_rate": 2.5017646037594967e-06,
|
|
"loss": 0.4142884314060211,
|
|
"step": 3999
|
|
},
|
|
{
|
|
"epoch": 2.100840336134454,
|
|
"grad_norm": 11.8651253965398,
|
|
"learning_rate": 2.4991178537566514e-06,
|
|
"loss": 0.5125176310539246,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 2.1013655462184873,
|
|
"grad_norm": 11.943873585595433,
|
|
"learning_rate": 2.49647203801158e-06,
|
|
"loss": 0.7816375494003296,
|
|
"step": 4001
|
|
},
|
|
{
|
|
"epoch": 2.101890756302521,
|
|
"grad_norm": 21.191938443320442,
|
|
"learning_rate": 2.4938271575126827e-06,
|
|
"loss": 0.5782123804092407,
|
|
"step": 4002
|
|
},
|
|
{
|
|
"epoch": 2.1024159663865545,
|
|
"grad_norm": 8.34295660762861,
|
|
"learning_rate": 2.4911832132480108e-06,
|
|
"loss": 0.4950450658798218,
|
|
"step": 4003
|
|
},
|
|
{
|
|
"epoch": 2.1029411764705883,
|
|
"grad_norm": 11.826622056411919,
|
|
"learning_rate": 2.4885402062052665e-06,
|
|
"loss": 0.5004026889801025,
|
|
"step": 4004
|
|
},
|
|
{
|
|
"epoch": 2.1034663865546217,
|
|
"grad_norm": 12.242468251597597,
|
|
"learning_rate": 2.4858981373718006e-06,
|
|
"loss": 0.3473156690597534,
|
|
"step": 4005
|
|
},
|
|
{
|
|
"epoch": 2.1039915966386555,
|
|
"grad_norm": 10.386770019916467,
|
|
"learning_rate": 2.483257007734613e-06,
|
|
"loss": 0.9702704548835754,
|
|
"step": 4006
|
|
},
|
|
{
|
|
"epoch": 2.104516806722689,
|
|
"grad_norm": 17.081894558842556,
|
|
"learning_rate": 2.480616818280357e-06,
|
|
"loss": 0.6458576917648315,
|
|
"step": 4007
|
|
},
|
|
{
|
|
"epoch": 2.1050420168067228,
|
|
"grad_norm": 9.706915956311411,
|
|
"learning_rate": 2.4779775699953303e-06,
|
|
"loss": 0.6217638254165649,
|
|
"step": 4008
|
|
},
|
|
{
|
|
"epoch": 2.105567226890756,
|
|
"grad_norm": 12.262829190544315,
|
|
"learning_rate": 2.47533926386548e-06,
|
|
"loss": 0.5689232349395752,
|
|
"step": 4009
|
|
},
|
|
{
|
|
"epoch": 2.10609243697479,
|
|
"grad_norm": 16.265738021721162,
|
|
"learning_rate": 2.472701900876402e-06,
|
|
"loss": 0.7930733561515808,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 2.1066176470588234,
|
|
"grad_norm": 13.850093582157232,
|
|
"learning_rate": 2.4700654820133385e-06,
|
|
"loss": 0.6801016330718994,
|
|
"step": 4011
|
|
},
|
|
{
|
|
"epoch": 2.107142857142857,
|
|
"grad_norm": 13.226655277762632,
|
|
"learning_rate": 2.4674300082611804e-06,
|
|
"loss": 0.48762497305870056,
|
|
"step": 4012
|
|
},
|
|
{
|
|
"epoch": 2.1076680672268906,
|
|
"grad_norm": 13.586369828045024,
|
|
"learning_rate": 2.4647954806044633e-06,
|
|
"loss": 0.5265434980392456,
|
|
"step": 4013
|
|
},
|
|
{
|
|
"epoch": 2.1081932773109244,
|
|
"grad_norm": 12.633515021762046,
|
|
"learning_rate": 2.4621619000273746e-06,
|
|
"loss": 0.33787739276885986,
|
|
"step": 4014
|
|
},
|
|
{
|
|
"epoch": 2.108718487394958,
|
|
"grad_norm": 10.953664180727404,
|
|
"learning_rate": 2.4595292675137427e-06,
|
|
"loss": 0.8921220898628235,
|
|
"step": 4015
|
|
},
|
|
{
|
|
"epoch": 2.1092436974789917,
|
|
"grad_norm": 12.859362953856833,
|
|
"learning_rate": 2.4568975840470432e-06,
|
|
"loss": 0.3590652644634247,
|
|
"step": 4016
|
|
},
|
|
{
|
|
"epoch": 2.109768907563025,
|
|
"grad_norm": 10.683680488236497,
|
|
"learning_rate": 2.454266850610398e-06,
|
|
"loss": 0.5140031576156616,
|
|
"step": 4017
|
|
},
|
|
{
|
|
"epoch": 2.110294117647059,
|
|
"grad_norm": 13.1882645947248,
|
|
"learning_rate": 2.451637068186573e-06,
|
|
"loss": 0.6276907920837402,
|
|
"step": 4018
|
|
},
|
|
{
|
|
"epoch": 2.1108193277310923,
|
|
"grad_norm": 14.307210559448931,
|
|
"learning_rate": 2.4490082377579805e-06,
|
|
"loss": 0.5477755069732666,
|
|
"step": 4019
|
|
},
|
|
{
|
|
"epoch": 2.111344537815126,
|
|
"grad_norm": 8.849757383111541,
|
|
"learning_rate": 2.4463803603066745e-06,
|
|
"loss": 0.45354244112968445,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 2.1118697478991595,
|
|
"grad_norm": 28.449778496920423,
|
|
"learning_rate": 2.443753436814354e-06,
|
|
"loss": 0.5317434668540955,
|
|
"step": 4021
|
|
},
|
|
{
|
|
"epoch": 2.1123949579831933,
|
|
"grad_norm": 12.821823751423944,
|
|
"learning_rate": 2.4411274682623654e-06,
|
|
"loss": 0.5347519516944885,
|
|
"step": 4022
|
|
},
|
|
{
|
|
"epoch": 2.1129201680672267,
|
|
"grad_norm": 11.66289792100645,
|
|
"learning_rate": 2.4385024556316928e-06,
|
|
"loss": 0.3007144629955292,
|
|
"step": 4023
|
|
},
|
|
{
|
|
"epoch": 2.1134453781512605,
|
|
"grad_norm": 15.17286235663046,
|
|
"learning_rate": 2.4358783999029667e-06,
|
|
"loss": 0.960580587387085,
|
|
"step": 4024
|
|
},
|
|
{
|
|
"epoch": 2.113970588235294,
|
|
"grad_norm": 19.732052043929922,
|
|
"learning_rate": 2.433255302056458e-06,
|
|
"loss": 1.1223571300506592,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 2.1144957983193278,
|
|
"grad_norm": 14.03611980370771,
|
|
"learning_rate": 2.4306331630720814e-06,
|
|
"loss": 1.2803820371627808,
|
|
"step": 4026
|
|
},
|
|
{
|
|
"epoch": 2.115021008403361,
|
|
"grad_norm": 10.570645410263367,
|
|
"learning_rate": 2.428011983929391e-06,
|
|
"loss": 0.344987154006958,
|
|
"step": 4027
|
|
},
|
|
{
|
|
"epoch": 2.115546218487395,
|
|
"grad_norm": 9.515052114336722,
|
|
"learning_rate": 2.425391765607587e-06,
|
|
"loss": 0.7571660280227661,
|
|
"step": 4028
|
|
},
|
|
{
|
|
"epoch": 2.1160714285714284,
|
|
"grad_norm": 22.800482756931466,
|
|
"learning_rate": 2.4227725090855063e-06,
|
|
"loss": 0.772080659866333,
|
|
"step": 4029
|
|
},
|
|
{
|
|
"epoch": 2.116596638655462,
|
|
"grad_norm": 13.531342528451523,
|
|
"learning_rate": 2.4201542153416264e-06,
|
|
"loss": 0.3090960383415222,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 2.1171218487394956,
|
|
"grad_norm": 8.453552692084463,
|
|
"learning_rate": 2.41753688535407e-06,
|
|
"loss": 0.36753419041633606,
|
|
"step": 4031
|
|
},
|
|
{
|
|
"epoch": 2.1176470588235294,
|
|
"grad_norm": 13.202252424696026,
|
|
"learning_rate": 2.4149205201005943e-06,
|
|
"loss": 0.3277924060821533,
|
|
"step": 4032
|
|
},
|
|
{
|
|
"epoch": 2.118172268907563,
|
|
"grad_norm": 9.125245980593691,
|
|
"learning_rate": 2.412305120558599e-06,
|
|
"loss": 0.8689071536064148,
|
|
"step": 4033
|
|
},
|
|
{
|
|
"epoch": 2.1186974789915967,
|
|
"grad_norm": 13.216372617169977,
|
|
"learning_rate": 2.4096906877051217e-06,
|
|
"loss": 0.2384854257106781,
|
|
"step": 4034
|
|
},
|
|
{
|
|
"epoch": 2.11922268907563,
|
|
"grad_norm": 8.202640191300103,
|
|
"learning_rate": 2.40707722251684e-06,
|
|
"loss": 0.42244455218315125,
|
|
"step": 4035
|
|
},
|
|
{
|
|
"epoch": 2.119747899159664,
|
|
"grad_norm": 11.09092881260831,
|
|
"learning_rate": 2.4044647259700686e-06,
|
|
"loss": 0.6288996934890747,
|
|
"step": 4036
|
|
},
|
|
{
|
|
"epoch": 2.1202731092436973,
|
|
"grad_norm": 10.894244829347828,
|
|
"learning_rate": 2.40185319904076e-06,
|
|
"loss": 0.8319982290267944,
|
|
"step": 4037
|
|
},
|
|
{
|
|
"epoch": 2.120798319327731,
|
|
"grad_norm": 9.945751874907838,
|
|
"learning_rate": 2.3992426427045096e-06,
|
|
"loss": 0.2343321442604065,
|
|
"step": 4038
|
|
},
|
|
{
|
|
"epoch": 2.1213235294117645,
|
|
"grad_norm": 10.185372354706145,
|
|
"learning_rate": 2.3966330579365436e-06,
|
|
"loss": 0.3003634810447693,
|
|
"step": 4039
|
|
},
|
|
{
|
|
"epoch": 2.1218487394957983,
|
|
"grad_norm": 10.997731967945157,
|
|
"learning_rate": 2.3940244457117295e-06,
|
|
"loss": 0.41671162843704224,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 2.1223739495798317,
|
|
"grad_norm": 19.279236304933864,
|
|
"learning_rate": 2.391416807004568e-06,
|
|
"loss": 0.6006914377212524,
|
|
"step": 4041
|
|
},
|
|
{
|
|
"epoch": 2.1228991596638656,
|
|
"grad_norm": 10.69244743177686,
|
|
"learning_rate": 2.3888101427891997e-06,
|
|
"loss": 0.3275656998157501,
|
|
"step": 4042
|
|
},
|
|
{
|
|
"epoch": 2.123424369747899,
|
|
"grad_norm": 7.268202003336679,
|
|
"learning_rate": 2.386204454039398e-06,
|
|
"loss": 0.17830371856689453,
|
|
"step": 4043
|
|
},
|
|
{
|
|
"epoch": 2.1239495798319328,
|
|
"grad_norm": 19.345509501951057,
|
|
"learning_rate": 2.3835997417285726e-06,
|
|
"loss": 0.5082678198814392,
|
|
"step": 4044
|
|
},
|
|
{
|
|
"epoch": 2.1244747899159666,
|
|
"grad_norm": 12.21970607249505,
|
|
"learning_rate": 2.3809960068297732e-06,
|
|
"loss": 0.5014134645462036,
|
|
"step": 4045
|
|
},
|
|
{
|
|
"epoch": 2.125,
|
|
"grad_norm": 11.723162376756218,
|
|
"learning_rate": 2.3783932503156776e-06,
|
|
"loss": 0.44408175349235535,
|
|
"step": 4046
|
|
},
|
|
{
|
|
"epoch": 2.1255252100840334,
|
|
"grad_norm": 11.893135065516487,
|
|
"learning_rate": 2.375791473158602e-06,
|
|
"loss": 0.3746313154697418,
|
|
"step": 4047
|
|
},
|
|
{
|
|
"epoch": 2.1260504201680672,
|
|
"grad_norm": 15.624052700547455,
|
|
"learning_rate": 2.373190676330494e-06,
|
|
"loss": 0.2957392930984497,
|
|
"step": 4048
|
|
},
|
|
{
|
|
"epoch": 2.126575630252101,
|
|
"grad_norm": 9.96269591888791,
|
|
"learning_rate": 2.370590860802938e-06,
|
|
"loss": 0.48576468229293823,
|
|
"step": 4049
|
|
},
|
|
{
|
|
"epoch": 2.1271008403361344,
|
|
"grad_norm": 13.965649108367241,
|
|
"learning_rate": 2.3679920275471507e-06,
|
|
"loss": 0.9501616358757019,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 2.127626050420168,
|
|
"grad_norm": 12.50214804848332,
|
|
"learning_rate": 2.3653941775339808e-06,
|
|
"loss": 0.8665844202041626,
|
|
"step": 4051
|
|
},
|
|
{
|
|
"epoch": 2.1281512605042017,
|
|
"grad_norm": 10.905922472614874,
|
|
"learning_rate": 2.362797311733909e-06,
|
|
"loss": 0.4295923113822937,
|
|
"step": 4052
|
|
},
|
|
{
|
|
"epoch": 2.1286764705882355,
|
|
"grad_norm": 11.523552109263122,
|
|
"learning_rate": 2.3602014311170524e-06,
|
|
"loss": 0.8264093995094299,
|
|
"step": 4053
|
|
},
|
|
{
|
|
"epoch": 2.129201680672269,
|
|
"grad_norm": 16.55913585959276,
|
|
"learning_rate": 2.3576065366531574e-06,
|
|
"loss": 0.42858362197875977,
|
|
"step": 4054
|
|
},
|
|
{
|
|
"epoch": 2.1297268907563027,
|
|
"grad_norm": 9.302406598385065,
|
|
"learning_rate": 2.355012629311601e-06,
|
|
"loss": 0.26310187578201294,
|
|
"step": 4055
|
|
},
|
|
{
|
|
"epoch": 2.130252100840336,
|
|
"grad_norm": 11.31262283218092,
|
|
"learning_rate": 2.3524197100613928e-06,
|
|
"loss": 0.4573161005973816,
|
|
"step": 4056
|
|
},
|
|
{
|
|
"epoch": 2.13077731092437,
|
|
"grad_norm": 9.631517556209394,
|
|
"learning_rate": 2.3498277798711725e-06,
|
|
"loss": 0.34721511602401733,
|
|
"step": 4057
|
|
},
|
|
{
|
|
"epoch": 2.1313025210084033,
|
|
"grad_norm": 14.51207449185493,
|
|
"learning_rate": 2.3472368397092123e-06,
|
|
"loss": 0.6342383027076721,
|
|
"step": 4058
|
|
},
|
|
{
|
|
"epoch": 2.131827731092437,
|
|
"grad_norm": 14.037058443061177,
|
|
"learning_rate": 2.3446468905434093e-06,
|
|
"loss": 0.6850436925888062,
|
|
"step": 4059
|
|
},
|
|
{
|
|
"epoch": 2.1323529411764706,
|
|
"grad_norm": 14.932047042158299,
|
|
"learning_rate": 2.342057933341299e-06,
|
|
"loss": 0.3733258843421936,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 2.1328781512605044,
|
|
"grad_norm": 14.33288445780609,
|
|
"learning_rate": 2.3394699690700395e-06,
|
|
"loss": 1.062628984451294,
|
|
"step": 4061
|
|
},
|
|
{
|
|
"epoch": 2.133403361344538,
|
|
"grad_norm": 11.434354174339296,
|
|
"learning_rate": 2.33688299869642e-06,
|
|
"loss": 0.665610134601593,
|
|
"step": 4062
|
|
},
|
|
{
|
|
"epoch": 2.1339285714285716,
|
|
"grad_norm": 12.173854876245024,
|
|
"learning_rate": 2.3342970231868573e-06,
|
|
"loss": 0.3131663203239441,
|
|
"step": 4063
|
|
},
|
|
{
|
|
"epoch": 2.134453781512605,
|
|
"grad_norm": 14.851744130736751,
|
|
"learning_rate": 2.3317120435073992e-06,
|
|
"loss": 0.5863408446311951,
|
|
"step": 4064
|
|
},
|
|
{
|
|
"epoch": 2.134978991596639,
|
|
"grad_norm": 8.659845983449085,
|
|
"learning_rate": 2.3291280606237186e-06,
|
|
"loss": 0.5293826460838318,
|
|
"step": 4065
|
|
},
|
|
{
|
|
"epoch": 2.1355042016806722,
|
|
"grad_norm": 11.008841469881911,
|
|
"learning_rate": 2.326545075501119e-06,
|
|
"loss": 0.4050210416316986,
|
|
"step": 4066
|
|
},
|
|
{
|
|
"epoch": 2.136029411764706,
|
|
"grad_norm": 12.752113588452234,
|
|
"learning_rate": 2.3239630891045255e-06,
|
|
"loss": 0.8473905920982361,
|
|
"step": 4067
|
|
},
|
|
{
|
|
"epoch": 2.1365546218487395,
|
|
"grad_norm": 11.676075587505613,
|
|
"learning_rate": 2.3213821023984994e-06,
|
|
"loss": 0.6905754804611206,
|
|
"step": 4068
|
|
},
|
|
{
|
|
"epoch": 2.1370798319327733,
|
|
"grad_norm": 6.487112493209409,
|
|
"learning_rate": 2.3188021163472206e-06,
|
|
"loss": 0.18758584558963776,
|
|
"step": 4069
|
|
},
|
|
{
|
|
"epoch": 2.1376050420168067,
|
|
"grad_norm": 8.909557989873372,
|
|
"learning_rate": 2.3162231319144984e-06,
|
|
"loss": 0.19892190396785736,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 2.1381302521008405,
|
|
"grad_norm": 8.227572511150989,
|
|
"learning_rate": 2.313645150063767e-06,
|
|
"loss": 0.3256494104862213,
|
|
"step": 4071
|
|
},
|
|
{
|
|
"epoch": 2.138655462184874,
|
|
"grad_norm": 14.821989068500018,
|
|
"learning_rate": 2.3110681717580856e-06,
|
|
"loss": 0.4866800308227539,
|
|
"step": 4072
|
|
},
|
|
{
|
|
"epoch": 2.1391806722689077,
|
|
"grad_norm": 7.70065568985152,
|
|
"learning_rate": 2.308492197960141e-06,
|
|
"loss": 0.3043510913848877,
|
|
"step": 4073
|
|
},
|
|
{
|
|
"epoch": 2.139705882352941,
|
|
"grad_norm": 22.49326411043284,
|
|
"learning_rate": 2.30591722963224e-06,
|
|
"loss": 0.5373920202255249,
|
|
"step": 4074
|
|
},
|
|
{
|
|
"epoch": 2.140231092436975,
|
|
"grad_norm": 9.318984538849298,
|
|
"learning_rate": 2.303343267736321e-06,
|
|
"loss": 0.2898182272911072,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 2.1407563025210083,
|
|
"grad_norm": 15.944258917000777,
|
|
"learning_rate": 2.3007703132339406e-06,
|
|
"loss": 0.4407224953174591,
|
|
"step": 4076
|
|
},
|
|
{
|
|
"epoch": 2.141281512605042,
|
|
"grad_norm": 9.669606729200078,
|
|
"learning_rate": 2.2981983670862796e-06,
|
|
"loss": 0.43881750106811523,
|
|
"step": 4077
|
|
},
|
|
{
|
|
"epoch": 2.1418067226890756,
|
|
"grad_norm": 9.714564211649812,
|
|
"learning_rate": 2.295627430254145e-06,
|
|
"loss": 0.23658540844917297,
|
|
"step": 4078
|
|
},
|
|
{
|
|
"epoch": 2.1423319327731094,
|
|
"grad_norm": 10.424042513988447,
|
|
"learning_rate": 2.2930575036979634e-06,
|
|
"loss": 0.676588237285614,
|
|
"step": 4079
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 10.492214268235987,
|
|
"learning_rate": 2.2904885883777865e-06,
|
|
"loss": 0.4801306128501892,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 2.1433823529411766,
|
|
"grad_norm": 13.9587215633948,
|
|
"learning_rate": 2.2879206852532854e-06,
|
|
"loss": 0.2784840166568756,
|
|
"step": 4081
|
|
},
|
|
{
|
|
"epoch": 2.14390756302521,
|
|
"grad_norm": 10.142025722393312,
|
|
"learning_rate": 2.2853537952837577e-06,
|
|
"loss": 0.35729077458381653,
|
|
"step": 4082
|
|
},
|
|
{
|
|
"epoch": 2.144432773109244,
|
|
"grad_norm": 9.805524248752027,
|
|
"learning_rate": 2.2827879194281196e-06,
|
|
"loss": 0.30716753005981445,
|
|
"step": 4083
|
|
},
|
|
{
|
|
"epoch": 2.1449579831932772,
|
|
"grad_norm": 12.828334694590527,
|
|
"learning_rate": 2.2802230586449074e-06,
|
|
"loss": 0.6840190887451172,
|
|
"step": 4084
|
|
},
|
|
{
|
|
"epoch": 2.145483193277311,
|
|
"grad_norm": 14.622043020032047,
|
|
"learning_rate": 2.2776592138922806e-06,
|
|
"loss": 0.46795228123664856,
|
|
"step": 4085
|
|
},
|
|
{
|
|
"epoch": 2.1460084033613445,
|
|
"grad_norm": 11.523832735496942,
|
|
"learning_rate": 2.275096386128016e-06,
|
|
"loss": 0.42503461241722107,
|
|
"step": 4086
|
|
},
|
|
{
|
|
"epoch": 2.1465336134453783,
|
|
"grad_norm": 10.337690850053468,
|
|
"learning_rate": 2.2725345763095146e-06,
|
|
"loss": 0.6719300150871277,
|
|
"step": 4087
|
|
},
|
|
{
|
|
"epoch": 2.1470588235294117,
|
|
"grad_norm": 10.17049504323966,
|
|
"learning_rate": 2.269973785393794e-06,
|
|
"loss": 0.48671677708625793,
|
|
"step": 4088
|
|
},
|
|
{
|
|
"epoch": 2.1475840336134455,
|
|
"grad_norm": 11.769018661108722,
|
|
"learning_rate": 2.2674140143374904e-06,
|
|
"loss": 0.46760737895965576,
|
|
"step": 4089
|
|
},
|
|
{
|
|
"epoch": 2.148109243697479,
|
|
"grad_norm": 12.905616796952135,
|
|
"learning_rate": 2.2648552640968646e-06,
|
|
"loss": 0.5618797540664673,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 2.1486344537815127,
|
|
"grad_norm": 10.39339343948446,
|
|
"learning_rate": 2.2622975356277903e-06,
|
|
"loss": 0.36223387718200684,
|
|
"step": 4091
|
|
},
|
|
{
|
|
"epoch": 2.149159663865546,
|
|
"grad_norm": 9.077655547989146,
|
|
"learning_rate": 2.2597408298857613e-06,
|
|
"loss": 0.3210272192955017,
|
|
"step": 4092
|
|
},
|
|
{
|
|
"epoch": 2.14968487394958,
|
|
"grad_norm": 12.806312458390245,
|
|
"learning_rate": 2.2571851478258903e-06,
|
|
"loss": 0.33161550760269165,
|
|
"step": 4093
|
|
},
|
|
{
|
|
"epoch": 2.1502100840336134,
|
|
"grad_norm": 9.361168044152002,
|
|
"learning_rate": 2.2546304904029055e-06,
|
|
"loss": 0.3387778699398041,
|
|
"step": 4094
|
|
},
|
|
{
|
|
"epoch": 2.150735294117647,
|
|
"grad_norm": 12.34021327672009,
|
|
"learning_rate": 2.2520768585711544e-06,
|
|
"loss": 0.2693478465080261,
|
|
"step": 4095
|
|
},
|
|
{
|
|
"epoch": 2.1512605042016806,
|
|
"grad_norm": 14.200250639417026,
|
|
"learning_rate": 2.249524253284598e-06,
|
|
"loss": 1.022866129875183,
|
|
"step": 4096
|
|
},
|
|
{
|
|
"epoch": 2.1517857142857144,
|
|
"grad_norm": 15.085975732395568,
|
|
"learning_rate": 2.2469726754968207e-06,
|
|
"loss": 0.6444025039672852,
|
|
"step": 4097
|
|
},
|
|
{
|
|
"epoch": 2.152310924369748,
|
|
"grad_norm": 19.904778098906103,
|
|
"learning_rate": 2.244422126161017e-06,
|
|
"loss": 0.5390111804008484,
|
|
"step": 4098
|
|
},
|
|
{
|
|
"epoch": 2.1528361344537816,
|
|
"grad_norm": 9.404188201411367,
|
|
"learning_rate": 2.241872606229998e-06,
|
|
"loss": 0.6100510358810425,
|
|
"step": 4099
|
|
},
|
|
{
|
|
"epoch": 2.153361344537815,
|
|
"grad_norm": 12.534920979878125,
|
|
"learning_rate": 2.239324116656192e-06,
|
|
"loss": 0.20052313804626465,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 2.153886554621849,
|
|
"grad_norm": 7.030972536094845,
|
|
"learning_rate": 2.236776658391641e-06,
|
|
"loss": 0.27668434381484985,
|
|
"step": 4101
|
|
},
|
|
{
|
|
"epoch": 2.1544117647058822,
|
|
"grad_norm": 17.569630042939462,
|
|
"learning_rate": 2.2342302323880026e-06,
|
|
"loss": 0.413688987493515,
|
|
"step": 4102
|
|
},
|
|
{
|
|
"epoch": 2.154936974789916,
|
|
"grad_norm": 13.881738834149553,
|
|
"learning_rate": 2.2316848395965483e-06,
|
|
"loss": 0.5751949548721313,
|
|
"step": 4103
|
|
},
|
|
{
|
|
"epoch": 2.1554621848739495,
|
|
"grad_norm": 16.076818408765543,
|
|
"learning_rate": 2.2291404809681627e-06,
|
|
"loss": 0.9522318243980408,
|
|
"step": 4104
|
|
},
|
|
{
|
|
"epoch": 2.1559873949579833,
|
|
"grad_norm": 13.406280337945816,
|
|
"learning_rate": 2.2265971574533474e-06,
|
|
"loss": 0.40907883644104004,
|
|
"step": 4105
|
|
},
|
|
{
|
|
"epoch": 2.1565126050420167,
|
|
"grad_norm": 11.330533682546829,
|
|
"learning_rate": 2.224054870002214e-06,
|
|
"loss": 0.6507560014724731,
|
|
"step": 4106
|
|
},
|
|
{
|
|
"epoch": 2.1570378151260505,
|
|
"grad_norm": 13.994750393715663,
|
|
"learning_rate": 2.2215136195644884e-06,
|
|
"loss": 0.541474461555481,
|
|
"step": 4107
|
|
},
|
|
{
|
|
"epoch": 2.157563025210084,
|
|
"grad_norm": 11.129845575276443,
|
|
"learning_rate": 2.2189734070895086e-06,
|
|
"loss": 0.3323579728603363,
|
|
"step": 4108
|
|
},
|
|
{
|
|
"epoch": 2.1580882352941178,
|
|
"grad_norm": 13.908236890450672,
|
|
"learning_rate": 2.2164342335262244e-06,
|
|
"loss": 0.2566412091255188,
|
|
"step": 4109
|
|
},
|
|
{
|
|
"epoch": 2.158613445378151,
|
|
"grad_norm": 9.421555202556819,
|
|
"learning_rate": 2.2138960998231983e-06,
|
|
"loss": 0.34640175104141235,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 2.159138655462185,
|
|
"grad_norm": 15.501626790134464,
|
|
"learning_rate": 2.2113590069286033e-06,
|
|
"loss": 0.5923685431480408,
|
|
"step": 4111
|
|
},
|
|
{
|
|
"epoch": 2.1596638655462184,
|
|
"grad_norm": 8.407652272330782,
|
|
"learning_rate": 2.208822955790228e-06,
|
|
"loss": 0.35512563586235046,
|
|
"step": 4112
|
|
},
|
|
{
|
|
"epoch": 2.160189075630252,
|
|
"grad_norm": 13.002258015880964,
|
|
"learning_rate": 2.2062879473554654e-06,
|
|
"loss": 0.5846668481826782,
|
|
"step": 4113
|
|
},
|
|
{
|
|
"epoch": 2.1607142857142856,
|
|
"grad_norm": 13.773114956142704,
|
|
"learning_rate": 2.203753982571322e-06,
|
|
"loss": 0.6553114652633667,
|
|
"step": 4114
|
|
},
|
|
{
|
|
"epoch": 2.1612394957983194,
|
|
"grad_norm": 9.131268909989592,
|
|
"learning_rate": 2.2012210623844155e-06,
|
|
"loss": 0.5892950296401978,
|
|
"step": 4115
|
|
},
|
|
{
|
|
"epoch": 2.161764705882353,
|
|
"grad_norm": 8.048857099303655,
|
|
"learning_rate": 2.198689187740972e-06,
|
|
"loss": 0.46284598112106323,
|
|
"step": 4116
|
|
},
|
|
{
|
|
"epoch": 2.1622899159663866,
|
|
"grad_norm": 11.92598926356154,
|
|
"learning_rate": 2.1961583595868253e-06,
|
|
"loss": 0.3618340790271759,
|
|
"step": 4117
|
|
},
|
|
{
|
|
"epoch": 2.16281512605042,
|
|
"grad_norm": 10.369574199628978,
|
|
"learning_rate": 2.1936285788674204e-06,
|
|
"loss": 0.34268003702163696,
|
|
"step": 4118
|
|
},
|
|
{
|
|
"epoch": 2.163340336134454,
|
|
"grad_norm": 9.203339303925619,
|
|
"learning_rate": 2.191099846527813e-06,
|
|
"loss": 0.27453872561454773,
|
|
"step": 4119
|
|
},
|
|
{
|
|
"epoch": 2.1638655462184873,
|
|
"grad_norm": 15.470084126350917,
|
|
"learning_rate": 2.1885721635126646e-06,
|
|
"loss": 0.5603208541870117,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 2.164390756302521,
|
|
"grad_norm": 16.989271796983516,
|
|
"learning_rate": 2.186045530766244e-06,
|
|
"loss": 1.4984362125396729,
|
|
"step": 4121
|
|
},
|
|
{
|
|
"epoch": 2.1649159663865545,
|
|
"grad_norm": 9.931917013371637,
|
|
"learning_rate": 2.183519949232428e-06,
|
|
"loss": 0.5418696403503418,
|
|
"step": 4122
|
|
},
|
|
{
|
|
"epoch": 2.1654411764705883,
|
|
"grad_norm": 10.138879609496065,
|
|
"learning_rate": 2.180995419854703e-06,
|
|
"loss": 0.2958468198776245,
|
|
"step": 4123
|
|
},
|
|
{
|
|
"epoch": 2.1659663865546217,
|
|
"grad_norm": 11.93201177102272,
|
|
"learning_rate": 2.17847194357616e-06,
|
|
"loss": 0.2131626009941101,
|
|
"step": 4124
|
|
},
|
|
{
|
|
"epoch": 2.1664915966386555,
|
|
"grad_norm": 14.206516666505092,
|
|
"learning_rate": 2.1759495213394965e-06,
|
|
"loss": 1.0808008909225464,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 2.167016806722689,
|
|
"grad_norm": 9.416428725696495,
|
|
"learning_rate": 2.1734281540870158e-06,
|
|
"loss": 0.390267014503479,
|
|
"step": 4126
|
|
},
|
|
{
|
|
"epoch": 2.1675420168067228,
|
|
"grad_norm": 8.661698657424344,
|
|
"learning_rate": 2.1709078427606323e-06,
|
|
"loss": 0.29766106605529785,
|
|
"step": 4127
|
|
},
|
|
{
|
|
"epoch": 2.168067226890756,
|
|
"grad_norm": 11.100999631680564,
|
|
"learning_rate": 2.1683885883018596e-06,
|
|
"loss": 0.6078053116798401,
|
|
"step": 4128
|
|
},
|
|
{
|
|
"epoch": 2.16859243697479,
|
|
"grad_norm": 7.787858276858203,
|
|
"learning_rate": 2.165870391651819e-06,
|
|
"loss": 0.23034285008907318,
|
|
"step": 4129
|
|
},
|
|
{
|
|
"epoch": 2.1691176470588234,
|
|
"grad_norm": 7.997285454490383,
|
|
"learning_rate": 2.1633532537512374e-06,
|
|
"loss": 0.2559571862220764,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 2.169642857142857,
|
|
"grad_norm": 11.804350834609446,
|
|
"learning_rate": 2.160837175540444e-06,
|
|
"loss": 0.7174216508865356,
|
|
"step": 4131
|
|
},
|
|
{
|
|
"epoch": 2.1701680672268906,
|
|
"grad_norm": 13.936453134967207,
|
|
"learning_rate": 2.158322157959373e-06,
|
|
"loss": 0.2922583222389221,
|
|
"step": 4132
|
|
},
|
|
{
|
|
"epoch": 2.1706932773109244,
|
|
"grad_norm": 10.251391658667258,
|
|
"learning_rate": 2.155808201947563e-06,
|
|
"loss": 0.829541802406311,
|
|
"step": 4133
|
|
},
|
|
{
|
|
"epoch": 2.171218487394958,
|
|
"grad_norm": 10.23807325621361,
|
|
"learning_rate": 2.1532953084441575e-06,
|
|
"loss": 1.2362592220306396,
|
|
"step": 4134
|
|
},
|
|
{
|
|
"epoch": 2.1717436974789917,
|
|
"grad_norm": 10.838998585342512,
|
|
"learning_rate": 2.1507834783879007e-06,
|
|
"loss": 0.530997097492218,
|
|
"step": 4135
|
|
},
|
|
{
|
|
"epoch": 2.172268907563025,
|
|
"grad_norm": 8.553686636875732,
|
|
"learning_rate": 2.1482727127171395e-06,
|
|
"loss": 0.3646453022956848,
|
|
"step": 4136
|
|
},
|
|
{
|
|
"epoch": 2.172794117647059,
|
|
"grad_norm": 11.613822231051532,
|
|
"learning_rate": 2.145763012369824e-06,
|
|
"loss": 0.7255064249038696,
|
|
"step": 4137
|
|
},
|
|
{
|
|
"epoch": 2.1733193277310923,
|
|
"grad_norm": 11.098132735933739,
|
|
"learning_rate": 2.1432543782835064e-06,
|
|
"loss": 0.7276588082313538,
|
|
"step": 4138
|
|
},
|
|
{
|
|
"epoch": 2.173844537815126,
|
|
"grad_norm": 25.505741451033256,
|
|
"learning_rate": 2.14074681139534e-06,
|
|
"loss": 0.5854189991950989,
|
|
"step": 4139
|
|
},
|
|
{
|
|
"epoch": 2.1743697478991595,
|
|
"grad_norm": 7.537161132561563,
|
|
"learning_rate": 2.1382403126420804e-06,
|
|
"loss": 0.7636845111846924,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 2.1748949579831933,
|
|
"grad_norm": 10.668134187294704,
|
|
"learning_rate": 2.1357348829600816e-06,
|
|
"loss": 0.6803666949272156,
|
|
"step": 4141
|
|
},
|
|
{
|
|
"epoch": 2.1754201680672267,
|
|
"grad_norm": 5.653673343197226,
|
|
"learning_rate": 2.1332305232853036e-06,
|
|
"loss": 0.2199927717447281,
|
|
"step": 4142
|
|
},
|
|
{
|
|
"epoch": 2.1759453781512605,
|
|
"grad_norm": 12.552287838561119,
|
|
"learning_rate": 2.130727234553301e-06,
|
|
"loss": 0.34859079122543335,
|
|
"step": 4143
|
|
},
|
|
{
|
|
"epoch": 2.176470588235294,
|
|
"grad_norm": 9.069673498422354,
|
|
"learning_rate": 2.128225017699232e-06,
|
|
"loss": 0.36638444662094116,
|
|
"step": 4144
|
|
},
|
|
{
|
|
"epoch": 2.1769957983193278,
|
|
"grad_norm": 22.65302641357171,
|
|
"learning_rate": 2.125723873657852e-06,
|
|
"loss": 0.9880443811416626,
|
|
"step": 4145
|
|
},
|
|
{
|
|
"epoch": 2.177521008403361,
|
|
"grad_norm": 12.297120435804487,
|
|
"learning_rate": 2.123223803363516e-06,
|
|
"loss": 0.49851250648498535,
|
|
"step": 4146
|
|
},
|
|
{
|
|
"epoch": 2.178046218487395,
|
|
"grad_norm": 31.083247431672014,
|
|
"learning_rate": 2.1207248077501796e-06,
|
|
"loss": 0.7122225165367126,
|
|
"step": 4147
|
|
},
|
|
{
|
|
"epoch": 2.1785714285714284,
|
|
"grad_norm": 18.436865021595267,
|
|
"learning_rate": 2.118226887751394e-06,
|
|
"loss": 0.505310595035553,
|
|
"step": 4148
|
|
},
|
|
{
|
|
"epoch": 2.179096638655462,
|
|
"grad_norm": 12.294063749377852,
|
|
"learning_rate": 2.115730044300313e-06,
|
|
"loss": 0.4264427721500397,
|
|
"step": 4149
|
|
},
|
|
{
|
|
"epoch": 2.1796218487394956,
|
|
"grad_norm": 21.35135405565712,
|
|
"learning_rate": 2.113234278329685e-06,
|
|
"loss": 0.2829148769378662,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 2.1801470588235294,
|
|
"grad_norm": 11.512959775518773,
|
|
"learning_rate": 2.110739590771856e-06,
|
|
"loss": 0.2545013427734375,
|
|
"step": 4151
|
|
},
|
|
{
|
|
"epoch": 2.180672268907563,
|
|
"grad_norm": 10.334224531936984,
|
|
"learning_rate": 2.10824598255877e-06,
|
|
"loss": 0.6813156008720398,
|
|
"step": 4152
|
|
},
|
|
{
|
|
"epoch": 2.1811974789915967,
|
|
"grad_norm": 12.117580299201101,
|
|
"learning_rate": 2.105753454621966e-06,
|
|
"loss": 0.5756422281265259,
|
|
"step": 4153
|
|
},
|
|
{
|
|
"epoch": 2.18172268907563,
|
|
"grad_norm": 10.307502170621538,
|
|
"learning_rate": 2.103262007892583e-06,
|
|
"loss": 0.3108716905117035,
|
|
"step": 4154
|
|
},
|
|
{
|
|
"epoch": 2.182247899159664,
|
|
"grad_norm": 9.379900244197216,
|
|
"learning_rate": 2.100771643301351e-06,
|
|
"loss": 0.3554327189922333,
|
|
"step": 4155
|
|
},
|
|
{
|
|
"epoch": 2.1827731092436973,
|
|
"grad_norm": 12.52549367725084,
|
|
"learning_rate": 2.0982823617786017e-06,
|
|
"loss": 1.1059948205947876,
|
|
"step": 4156
|
|
},
|
|
{
|
|
"epoch": 2.183298319327731,
|
|
"grad_norm": 9.56281993182365,
|
|
"learning_rate": 2.095794164254259e-06,
|
|
"loss": 0.36582398414611816,
|
|
"step": 4157
|
|
},
|
|
{
|
|
"epoch": 2.1838235294117645,
|
|
"grad_norm": 8.05420997514092,
|
|
"learning_rate": 2.0933070516578407e-06,
|
|
"loss": 0.3737989664077759,
|
|
"step": 4158
|
|
},
|
|
{
|
|
"epoch": 2.1843487394957983,
|
|
"grad_norm": 9.748343049586008,
|
|
"learning_rate": 2.090821024918462e-06,
|
|
"loss": 0.6534856557846069,
|
|
"step": 4159
|
|
},
|
|
{
|
|
"epoch": 2.184873949579832,
|
|
"grad_norm": 9.150306654086352,
|
|
"learning_rate": 2.0883360849648294e-06,
|
|
"loss": 0.5786707997322083,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 2.1853991596638656,
|
|
"grad_norm": 10.123184561076345,
|
|
"learning_rate": 2.0858522327252467e-06,
|
|
"loss": 0.5475946664810181,
|
|
"step": 4161
|
|
},
|
|
{
|
|
"epoch": 2.185924369747899,
|
|
"grad_norm": 9.814161672103774,
|
|
"learning_rate": 2.0833694691276093e-06,
|
|
"loss": 0.3300240933895111,
|
|
"step": 4162
|
|
},
|
|
{
|
|
"epoch": 2.1864495798319328,
|
|
"grad_norm": 12.620510761109097,
|
|
"learning_rate": 2.0808877950994037e-06,
|
|
"loss": 0.6013469696044922,
|
|
"step": 4163
|
|
},
|
|
{
|
|
"epoch": 2.1869747899159666,
|
|
"grad_norm": 9.001212339683478,
|
|
"learning_rate": 2.078407211567717e-06,
|
|
"loss": 0.5732584595680237,
|
|
"step": 4164
|
|
},
|
|
{
|
|
"epoch": 2.1875,
|
|
"grad_norm": 10.71379548020254,
|
|
"learning_rate": 2.0759277194592208e-06,
|
|
"loss": 0.30177998542785645,
|
|
"step": 4165
|
|
},
|
|
{
|
|
"epoch": 2.1880252100840334,
|
|
"grad_norm": 16.394340703005806,
|
|
"learning_rate": 2.073449319700184e-06,
|
|
"loss": 0.5181938409805298,
|
|
"step": 4166
|
|
},
|
|
{
|
|
"epoch": 2.1885504201680672,
|
|
"grad_norm": 10.618518841939864,
|
|
"learning_rate": 2.070972013216464e-06,
|
|
"loss": 0.3636229336261749,
|
|
"step": 4167
|
|
},
|
|
{
|
|
"epoch": 2.189075630252101,
|
|
"grad_norm": 9.175274377354294,
|
|
"learning_rate": 2.0684958009335122e-06,
|
|
"loss": 0.3279910087585449,
|
|
"step": 4168
|
|
},
|
|
{
|
|
"epoch": 2.1896008403361344,
|
|
"grad_norm": 17.577912285931166,
|
|
"learning_rate": 2.06602068377637e-06,
|
|
"loss": 0.3370909094810486,
|
|
"step": 4169
|
|
},
|
|
{
|
|
"epoch": 2.190126050420168,
|
|
"grad_norm": 12.503618346397172,
|
|
"learning_rate": 2.0635466626696688e-06,
|
|
"loss": 0.3615824580192566,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 2.1906512605042017,
|
|
"grad_norm": 16.875652296761626,
|
|
"learning_rate": 2.061073738537635e-06,
|
|
"loss": 0.5076624155044556,
|
|
"step": 4171
|
|
},
|
|
{
|
|
"epoch": 2.1911764705882355,
|
|
"grad_norm": 23.41322683525522,
|
|
"learning_rate": 2.05860191230408e-06,
|
|
"loss": 0.5695816278457642,
|
|
"step": 4172
|
|
},
|
|
{
|
|
"epoch": 2.191701680672269,
|
|
"grad_norm": 8.037363880378786,
|
|
"learning_rate": 2.0561311848924082e-06,
|
|
"loss": 0.5932033061981201,
|
|
"step": 4173
|
|
},
|
|
{
|
|
"epoch": 2.1922268907563027,
|
|
"grad_norm": 9.483592587045889,
|
|
"learning_rate": 2.053661557225611e-06,
|
|
"loss": 0.5292115211486816,
|
|
"step": 4174
|
|
},
|
|
{
|
|
"epoch": 2.192752100840336,
|
|
"grad_norm": 15.771638520172871,
|
|
"learning_rate": 2.0511930302262724e-06,
|
|
"loss": 0.8906204700469971,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 2.19327731092437,
|
|
"grad_norm": 6.953253941111415,
|
|
"learning_rate": 2.048725604816561e-06,
|
|
"loss": 0.506250262260437,
|
|
"step": 4176
|
|
},
|
|
{
|
|
"epoch": 2.1938025210084033,
|
|
"grad_norm": 7.579622931622611,
|
|
"learning_rate": 2.0462592819182377e-06,
|
|
"loss": 0.5802386999130249,
|
|
"step": 4177
|
|
},
|
|
{
|
|
"epoch": 2.194327731092437,
|
|
"grad_norm": 12.317691620413207,
|
|
"learning_rate": 2.043794062452647e-06,
|
|
"loss": 0.6706414222717285,
|
|
"step": 4178
|
|
},
|
|
{
|
|
"epoch": 2.1948529411764706,
|
|
"grad_norm": 9.34852734409619,
|
|
"learning_rate": 2.0413299473407285e-06,
|
|
"loss": 0.6208543181419373,
|
|
"step": 4179
|
|
},
|
|
{
|
|
"epoch": 2.1953781512605044,
|
|
"grad_norm": 9.086832000084609,
|
|
"learning_rate": 2.0388669375030024e-06,
|
|
"loss": 0.3323096036911011,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 2.195903361344538,
|
|
"grad_norm": 11.767961058541344,
|
|
"learning_rate": 2.0364050338595792e-06,
|
|
"loss": 0.5645102262496948,
|
|
"step": 4181
|
|
},
|
|
{
|
|
"epoch": 2.1964285714285716,
|
|
"grad_norm": 18.284140518390636,
|
|
"learning_rate": 2.0339442373301548e-06,
|
|
"loss": 0.8418555855751038,
|
|
"step": 4182
|
|
},
|
|
{
|
|
"epoch": 2.196953781512605,
|
|
"grad_norm": 14.155789682869628,
|
|
"learning_rate": 2.031484548834013e-06,
|
|
"loss": 0.3883611559867859,
|
|
"step": 4183
|
|
},
|
|
{
|
|
"epoch": 2.197478991596639,
|
|
"grad_norm": 12.515288844892636,
|
|
"learning_rate": 2.0290259692900216e-06,
|
|
"loss": 0.6071314811706543,
|
|
"step": 4184
|
|
},
|
|
{
|
|
"epoch": 2.1980042016806722,
|
|
"grad_norm": 17.769203705565968,
|
|
"learning_rate": 2.0265684996166345e-06,
|
|
"loss": 1.4243782758712769,
|
|
"step": 4185
|
|
},
|
|
{
|
|
"epoch": 2.198529411764706,
|
|
"grad_norm": 15.035137568837966,
|
|
"learning_rate": 2.024112140731895e-06,
|
|
"loss": 0.4408267140388489,
|
|
"step": 4186
|
|
},
|
|
{
|
|
"epoch": 2.1990546218487395,
|
|
"grad_norm": 10.29686943582839,
|
|
"learning_rate": 2.021656893553427e-06,
|
|
"loss": 0.5268247723579407,
|
|
"step": 4187
|
|
},
|
|
{
|
|
"epoch": 2.1995798319327733,
|
|
"grad_norm": 11.977760038537417,
|
|
"learning_rate": 2.0192027589984377e-06,
|
|
"loss": 0.49362415075302124,
|
|
"step": 4188
|
|
},
|
|
{
|
|
"epoch": 2.2001050420168067,
|
|
"grad_norm": 9.706659921209024,
|
|
"learning_rate": 2.0167497379837254e-06,
|
|
"loss": 0.27251413464546204,
|
|
"step": 4189
|
|
},
|
|
{
|
|
"epoch": 2.2006302521008405,
|
|
"grad_norm": 22.27753361545864,
|
|
"learning_rate": 2.014297831425666e-06,
|
|
"loss": 0.7175557017326355,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 2.201155462184874,
|
|
"grad_norm": 14.991979543110716,
|
|
"learning_rate": 2.0118470402402223e-06,
|
|
"loss": 0.33612674474716187,
|
|
"step": 4191
|
|
},
|
|
{
|
|
"epoch": 2.2016806722689077,
|
|
"grad_norm": 9.789047134908518,
|
|
"learning_rate": 2.009397365342939e-06,
|
|
"loss": 0.27348434925079346,
|
|
"step": 4192
|
|
},
|
|
{
|
|
"epoch": 2.202205882352941,
|
|
"grad_norm": 10.804283172794818,
|
|
"learning_rate": 2.0069488076489445e-06,
|
|
"loss": 0.19717592000961304,
|
|
"step": 4193
|
|
},
|
|
{
|
|
"epoch": 2.202731092436975,
|
|
"grad_norm": 11.918979990452257,
|
|
"learning_rate": 2.0045013680729477e-06,
|
|
"loss": 0.4407789707183838,
|
|
"step": 4194
|
|
},
|
|
{
|
|
"epoch": 2.2032563025210083,
|
|
"grad_norm": 9.358364885066807,
|
|
"learning_rate": 2.0020550475292456e-06,
|
|
"loss": 0.369744211435318,
|
|
"step": 4195
|
|
},
|
|
{
|
|
"epoch": 2.203781512605042,
|
|
"grad_norm": 10.05960294577804,
|
|
"learning_rate": 1.999609846931711e-06,
|
|
"loss": 0.6133785247802734,
|
|
"step": 4196
|
|
},
|
|
{
|
|
"epoch": 2.2043067226890756,
|
|
"grad_norm": 13.33911712392511,
|
|
"learning_rate": 1.997165767193801e-06,
|
|
"loss": 0.5213902592658997,
|
|
"step": 4197
|
|
},
|
|
{
|
|
"epoch": 2.2048319327731094,
|
|
"grad_norm": 19.503275644843303,
|
|
"learning_rate": 1.994722809228554e-06,
|
|
"loss": 0.9305657148361206,
|
|
"step": 4198
|
|
},
|
|
{
|
|
"epoch": 2.205357142857143,
|
|
"grad_norm": 9.868231682672182,
|
|
"learning_rate": 1.9922809739485883e-06,
|
|
"loss": 0.7982741594314575,
|
|
"step": 4199
|
|
},
|
|
{
|
|
"epoch": 2.2058823529411766,
|
|
"grad_norm": 10.228297500346645,
|
|
"learning_rate": 1.9898402622661036e-06,
|
|
"loss": 0.7370745539665222,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 2.20640756302521,
|
|
"grad_norm": 20.771341640684224,
|
|
"learning_rate": 1.9874006750928783e-06,
|
|
"loss": 0.5306645035743713,
|
|
"step": 4201
|
|
},
|
|
{
|
|
"epoch": 2.206932773109244,
|
|
"grad_norm": 9.833383007286246,
|
|
"learning_rate": 1.9849622133402753e-06,
|
|
"loss": 0.37817084789276123,
|
|
"step": 4202
|
|
},
|
|
{
|
|
"epoch": 2.2074579831932772,
|
|
"grad_norm": 9.19511118204492,
|
|
"learning_rate": 1.9825248779192323e-06,
|
|
"loss": 0.6270235776901245,
|
|
"step": 4203
|
|
},
|
|
{
|
|
"epoch": 2.207983193277311,
|
|
"grad_norm": 12.791260823791497,
|
|
"learning_rate": 1.9800886697402684e-06,
|
|
"loss": 1.0493865013122559,
|
|
"step": 4204
|
|
},
|
|
{
|
|
"epoch": 2.2085084033613445,
|
|
"grad_norm": 9.811316938051316,
|
|
"learning_rate": 1.97765358971348e-06,
|
|
"loss": 0.7131880521774292,
|
|
"step": 4205
|
|
},
|
|
{
|
|
"epoch": 2.2090336134453783,
|
|
"grad_norm": 14.093480674583617,
|
|
"learning_rate": 1.9752196387485434e-06,
|
|
"loss": 0.5840585231781006,
|
|
"step": 4206
|
|
},
|
|
{
|
|
"epoch": 2.2095588235294117,
|
|
"grad_norm": 7.911030532639583,
|
|
"learning_rate": 1.9727868177547126e-06,
|
|
"loss": 0.33531811833381653,
|
|
"step": 4207
|
|
},
|
|
{
|
|
"epoch": 2.2100840336134455,
|
|
"grad_norm": 15.014794444530551,
|
|
"learning_rate": 1.9703551276408204e-06,
|
|
"loss": 0.7113238573074341,
|
|
"step": 4208
|
|
},
|
|
{
|
|
"epoch": 2.210609243697479,
|
|
"grad_norm": 12.543505964644165,
|
|
"learning_rate": 1.967924569315275e-06,
|
|
"loss": 0.45716071128845215,
|
|
"step": 4209
|
|
},
|
|
{
|
|
"epoch": 2.2111344537815127,
|
|
"grad_norm": 8.88840872635617,
|
|
"learning_rate": 1.9654951436860653e-06,
|
|
"loss": 0.5780990123748779,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 2.211659663865546,
|
|
"grad_norm": 8.302707224640224,
|
|
"learning_rate": 1.9630668516607543e-06,
|
|
"loss": 0.43058717250823975,
|
|
"step": 4211
|
|
},
|
|
{
|
|
"epoch": 2.21218487394958,
|
|
"grad_norm": 11.32072871138526,
|
|
"learning_rate": 1.9606396941464818e-06,
|
|
"loss": 0.2899579405784607,
|
|
"step": 4212
|
|
},
|
|
{
|
|
"epoch": 2.2127100840336134,
|
|
"grad_norm": 12.49695712166815,
|
|
"learning_rate": 1.958213672049964e-06,
|
|
"loss": 0.9015559554100037,
|
|
"step": 4213
|
|
},
|
|
{
|
|
"epoch": 2.213235294117647,
|
|
"grad_norm": 10.767827231610696,
|
|
"learning_rate": 1.9557887862774932e-06,
|
|
"loss": 0.6473977565765381,
|
|
"step": 4214
|
|
},
|
|
{
|
|
"epoch": 2.2137605042016806,
|
|
"grad_norm": 9.096515865985335,
|
|
"learning_rate": 1.9533650377349374e-06,
|
|
"loss": 0.27973464131355286,
|
|
"step": 4215
|
|
},
|
|
{
|
|
"epoch": 2.2142857142857144,
|
|
"grad_norm": 12.690723019200991,
|
|
"learning_rate": 1.950942427327737e-06,
|
|
"loss": 0.7399759292602539,
|
|
"step": 4216
|
|
},
|
|
{
|
|
"epoch": 2.214810924369748,
|
|
"grad_norm": 14.477758723294201,
|
|
"learning_rate": 1.9485209559609148e-06,
|
|
"loss": 0.24219633638858795,
|
|
"step": 4217
|
|
},
|
|
{
|
|
"epoch": 2.2153361344537816,
|
|
"grad_norm": 7.656369932489473,
|
|
"learning_rate": 1.9461006245390594e-06,
|
|
"loss": 0.14525285363197327,
|
|
"step": 4218
|
|
},
|
|
{
|
|
"epoch": 2.215861344537815,
|
|
"grad_norm": 8.932338220927106,
|
|
"learning_rate": 1.943681433966338e-06,
|
|
"loss": 0.3175009489059448,
|
|
"step": 4219
|
|
},
|
|
{
|
|
"epoch": 2.216386554621849,
|
|
"grad_norm": 7.723831645123156,
|
|
"learning_rate": 1.9412633851464905e-06,
|
|
"loss": 0.41556933522224426,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 2.2169117647058822,
|
|
"grad_norm": 17.498494880239466,
|
|
"learning_rate": 1.9388464789828316e-06,
|
|
"loss": 0.7165228128433228,
|
|
"step": 4221
|
|
},
|
|
{
|
|
"epoch": 2.217436974789916,
|
|
"grad_norm": 10.458573195068976,
|
|
"learning_rate": 1.9364307163782466e-06,
|
|
"loss": 0.4890228509902954,
|
|
"step": 4222
|
|
},
|
|
{
|
|
"epoch": 2.2179621848739495,
|
|
"grad_norm": 11.304200533087775,
|
|
"learning_rate": 1.9340160982351937e-06,
|
|
"loss": 0.6948677897453308,
|
|
"step": 4223
|
|
},
|
|
{
|
|
"epoch": 2.2184873949579833,
|
|
"grad_norm": 11.930371566472163,
|
|
"learning_rate": 1.9316026254557083e-06,
|
|
"loss": 0.372274249792099,
|
|
"step": 4224
|
|
},
|
|
{
|
|
"epoch": 2.2190126050420167,
|
|
"grad_norm": 9.730098144466242,
|
|
"learning_rate": 1.9291902989413935e-06,
|
|
"loss": 0.3224791884422302,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 2.2195378151260505,
|
|
"grad_norm": 28.299366470610863,
|
|
"learning_rate": 1.926779119593424e-06,
|
|
"loss": 1.8382922410964966,
|
|
"step": 4226
|
|
},
|
|
{
|
|
"epoch": 2.220063025210084,
|
|
"grad_norm": 23.461126414762614,
|
|
"learning_rate": 1.9243690883125495e-06,
|
|
"loss": 0.3881704807281494,
|
|
"step": 4227
|
|
},
|
|
{
|
|
"epoch": 2.2205882352941178,
|
|
"grad_norm": 11.216692612225211,
|
|
"learning_rate": 1.9219602059990855e-06,
|
|
"loss": 1.0345499515533447,
|
|
"step": 4228
|
|
},
|
|
{
|
|
"epoch": 2.221113445378151,
|
|
"grad_norm": 12.991118298538602,
|
|
"learning_rate": 1.9195524735529237e-06,
|
|
"loss": 0.2979498505592346,
|
|
"step": 4229
|
|
},
|
|
{
|
|
"epoch": 2.221638655462185,
|
|
"grad_norm": 12.497677517668397,
|
|
"learning_rate": 1.917145891873522e-06,
|
|
"loss": 0.293893426656723,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 2.2221638655462184,
|
|
"grad_norm": 10.544509646480991,
|
|
"learning_rate": 1.91474046185991e-06,
|
|
"loss": 0.34756922721862793,
|
|
"step": 4231
|
|
},
|
|
{
|
|
"epoch": 2.222689075630252,
|
|
"grad_norm": 14.556717169054096,
|
|
"learning_rate": 1.9123361844106897e-06,
|
|
"loss": 0.5735061168670654,
|
|
"step": 4232
|
|
},
|
|
{
|
|
"epoch": 2.2232142857142856,
|
|
"grad_norm": 11.593487925413427,
|
|
"learning_rate": 1.909933060424029e-06,
|
|
"loss": 0.4957185387611389,
|
|
"step": 4233
|
|
},
|
|
{
|
|
"epoch": 2.2237394957983194,
|
|
"grad_norm": 14.976538125462485,
|
|
"learning_rate": 1.9075310907976665e-06,
|
|
"loss": 0.5994662046432495,
|
|
"step": 4234
|
|
},
|
|
{
|
|
"epoch": 2.224264705882353,
|
|
"grad_norm": 15.837535116480117,
|
|
"learning_rate": 1.9051302764289075e-06,
|
|
"loss": 0.3711355924606323,
|
|
"step": 4235
|
|
},
|
|
{
|
|
"epoch": 2.2247899159663866,
|
|
"grad_norm": 10.753597804787082,
|
|
"learning_rate": 1.9027306182146287e-06,
|
|
"loss": 0.7939506769180298,
|
|
"step": 4236
|
|
},
|
|
{
|
|
"epoch": 2.22531512605042,
|
|
"grad_norm": 12.043672496085208,
|
|
"learning_rate": 1.9003321170512728e-06,
|
|
"loss": 0.6818105578422546,
|
|
"step": 4237
|
|
},
|
|
{
|
|
"epoch": 2.225840336134454,
|
|
"grad_norm": 13.476801970533435,
|
|
"learning_rate": 1.8979347738348498e-06,
|
|
"loss": 0.4162781834602356,
|
|
"step": 4238
|
|
},
|
|
{
|
|
"epoch": 2.2263655462184873,
|
|
"grad_norm": 9.75946868817556,
|
|
"learning_rate": 1.8955385894609414e-06,
|
|
"loss": 0.40535199642181396,
|
|
"step": 4239
|
|
},
|
|
{
|
|
"epoch": 2.226890756302521,
|
|
"grad_norm": 12.048971044153054,
|
|
"learning_rate": 1.8931435648246916e-06,
|
|
"loss": 0.3999428451061249,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 2.2274159663865545,
|
|
"grad_norm": 8.310005672894276,
|
|
"learning_rate": 1.890749700820813e-06,
|
|
"loss": 0.3226167559623718,
|
|
"step": 4241
|
|
},
|
|
{
|
|
"epoch": 2.2279411764705883,
|
|
"grad_norm": 10.69961698436323,
|
|
"learning_rate": 1.8883569983435846e-06,
|
|
"loss": 0.7567530870437622,
|
|
"step": 4242
|
|
},
|
|
{
|
|
"epoch": 2.2284663865546217,
|
|
"grad_norm": 12.533105232512634,
|
|
"learning_rate": 1.8859654582868508e-06,
|
|
"loss": 0.6056888103485107,
|
|
"step": 4243
|
|
},
|
|
{
|
|
"epoch": 2.2289915966386555,
|
|
"grad_norm": 10.63696061089627,
|
|
"learning_rate": 1.8835750815440223e-06,
|
|
"loss": 0.967621386051178,
|
|
"step": 4244
|
|
},
|
|
{
|
|
"epoch": 2.229516806722689,
|
|
"grad_norm": 11.261256743839713,
|
|
"learning_rate": 1.8811858690080764e-06,
|
|
"loss": 0.5182070136070251,
|
|
"step": 4245
|
|
},
|
|
{
|
|
"epoch": 2.2300420168067228,
|
|
"grad_norm": 11.348583071333728,
|
|
"learning_rate": 1.8787978215715513e-06,
|
|
"loss": 0.5857005715370178,
|
|
"step": 4246
|
|
},
|
|
{
|
|
"epoch": 2.230567226890756,
|
|
"grad_norm": 11.668928252340686,
|
|
"learning_rate": 1.8764109401265567e-06,
|
|
"loss": 0.5006313920021057,
|
|
"step": 4247
|
|
},
|
|
{
|
|
"epoch": 2.23109243697479,
|
|
"grad_norm": 9.570223735853158,
|
|
"learning_rate": 1.8740252255647616e-06,
|
|
"loss": 0.9233022928237915,
|
|
"step": 4248
|
|
},
|
|
{
|
|
"epoch": 2.2316176470588234,
|
|
"grad_norm": 14.529213295709033,
|
|
"learning_rate": 1.8716406787774e-06,
|
|
"loss": 0.5809304118156433,
|
|
"step": 4249
|
|
},
|
|
{
|
|
"epoch": 2.232142857142857,
|
|
"grad_norm": 9.445726587664717,
|
|
"learning_rate": 1.8692573006552712e-06,
|
|
"loss": 0.34896859526634216,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 2.2326680672268906,
|
|
"grad_norm": 10.016974712584476,
|
|
"learning_rate": 1.866875092088735e-06,
|
|
"loss": 0.4887806177139282,
|
|
"step": 4251
|
|
},
|
|
{
|
|
"epoch": 2.2331932773109244,
|
|
"grad_norm": 22.837821621436888,
|
|
"learning_rate": 1.864494053967718e-06,
|
|
"loss": 1.0725637674331665,
|
|
"step": 4252
|
|
},
|
|
{
|
|
"epoch": 2.233718487394958,
|
|
"grad_norm": 10.952656545944258,
|
|
"learning_rate": 1.862114187181705e-06,
|
|
"loss": 0.6504073739051819,
|
|
"step": 4253
|
|
},
|
|
{
|
|
"epoch": 2.2342436974789917,
|
|
"grad_norm": 29.280790993162977,
|
|
"learning_rate": 1.8597354926197492e-06,
|
|
"loss": 2.4205195903778076,
|
|
"step": 4254
|
|
},
|
|
{
|
|
"epoch": 2.234768907563025,
|
|
"grad_norm": 12.430858344818144,
|
|
"learning_rate": 1.8573579711704615e-06,
|
|
"loss": 1.1163129806518555,
|
|
"step": 4255
|
|
},
|
|
{
|
|
"epoch": 2.235294117647059,
|
|
"grad_norm": 18.441813056922744,
|
|
"learning_rate": 1.8549816237220153e-06,
|
|
"loss": 0.4767477512359619,
|
|
"step": 4256
|
|
},
|
|
{
|
|
"epoch": 2.2358193277310923,
|
|
"grad_norm": 13.193167198146616,
|
|
"learning_rate": 1.8526064511621455e-06,
|
|
"loss": 0.45058536529541016,
|
|
"step": 4257
|
|
},
|
|
{
|
|
"epoch": 2.236344537815126,
|
|
"grad_norm": 11.797257836622302,
|
|
"learning_rate": 1.850232454378149e-06,
|
|
"loss": 0.4441848695278168,
|
|
"step": 4258
|
|
},
|
|
{
|
|
"epoch": 2.2368697478991595,
|
|
"grad_norm": 11.28502299388834,
|
|
"learning_rate": 1.8478596342568827e-06,
|
|
"loss": 0.2549547553062439,
|
|
"step": 4259
|
|
},
|
|
{
|
|
"epoch": 2.2373949579831933,
|
|
"grad_norm": 9.977389844623083,
|
|
"learning_rate": 1.8454879916847619e-06,
|
|
"loss": 0.5016900300979614,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 2.2379201680672267,
|
|
"grad_norm": 13.518901266693817,
|
|
"learning_rate": 1.843117527547768e-06,
|
|
"loss": 0.2857413589954376,
|
|
"step": 4261
|
|
},
|
|
{
|
|
"epoch": 2.2384453781512605,
|
|
"grad_norm": 10.638081977113426,
|
|
"learning_rate": 1.8407482427314366e-06,
|
|
"loss": 0.6297985911369324,
|
|
"step": 4262
|
|
},
|
|
{
|
|
"epoch": 2.238970588235294,
|
|
"grad_norm": 13.721575737834797,
|
|
"learning_rate": 1.8383801381208644e-06,
|
|
"loss": 0.5709312558174133,
|
|
"step": 4263
|
|
},
|
|
{
|
|
"epoch": 2.2394957983193278,
|
|
"grad_norm": 8.553145652845286,
|
|
"learning_rate": 1.8360132146007077e-06,
|
|
"loss": 0.36588770151138306,
|
|
"step": 4264
|
|
},
|
|
{
|
|
"epoch": 2.240021008403361,
|
|
"grad_norm": 9.595777530635301,
|
|
"learning_rate": 1.8336474730551807e-06,
|
|
"loss": 0.6568527817726135,
|
|
"step": 4265
|
|
},
|
|
{
|
|
"epoch": 2.240546218487395,
|
|
"grad_norm": 9.879017482166368,
|
|
"learning_rate": 1.8312829143680562e-06,
|
|
"loss": 0.3603191375732422,
|
|
"step": 4266
|
|
},
|
|
{
|
|
"epoch": 2.2410714285714284,
|
|
"grad_norm": 6.578677773465254,
|
|
"learning_rate": 1.828919539422666e-06,
|
|
"loss": 0.4119417667388916,
|
|
"step": 4267
|
|
},
|
|
{
|
|
"epoch": 2.241596638655462,
|
|
"grad_norm": 11.091435318143251,
|
|
"learning_rate": 1.8265573491018978e-06,
|
|
"loss": 0.6172628998756409,
|
|
"step": 4268
|
|
},
|
|
{
|
|
"epoch": 2.2421218487394956,
|
|
"grad_norm": 28.052879120220954,
|
|
"learning_rate": 1.8241963442882005e-06,
|
|
"loss": 0.5042892694473267,
|
|
"step": 4269
|
|
},
|
|
{
|
|
"epoch": 2.2426470588235294,
|
|
"grad_norm": 10.741007294378209,
|
|
"learning_rate": 1.8218365258635767e-06,
|
|
"loss": 0.8626440763473511,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 2.243172268907563,
|
|
"grad_norm": 8.08388356503716,
|
|
"learning_rate": 1.8194778947095866e-06,
|
|
"loss": 0.8154909014701843,
|
|
"step": 4271
|
|
},
|
|
{
|
|
"epoch": 2.2436974789915967,
|
|
"grad_norm": 15.921824884484678,
|
|
"learning_rate": 1.8171204517073472e-06,
|
|
"loss": 0.3045297861099243,
|
|
"step": 4272
|
|
},
|
|
{
|
|
"epoch": 2.24422268907563,
|
|
"grad_norm": 11.139114130473832,
|
|
"learning_rate": 1.8147641977375313e-06,
|
|
"loss": 0.18273624777793884,
|
|
"step": 4273
|
|
},
|
|
{
|
|
"epoch": 2.244747899159664,
|
|
"grad_norm": 12.013231223046976,
|
|
"learning_rate": 1.8124091336803684e-06,
|
|
"loss": 0.6754957437515259,
|
|
"step": 4274
|
|
},
|
|
{
|
|
"epoch": 2.2452731092436973,
|
|
"grad_norm": 11.867861241802702,
|
|
"learning_rate": 1.81005526041564e-06,
|
|
"loss": 0.8179190158843994,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 2.245798319327731,
|
|
"grad_norm": 16.47379659801115,
|
|
"learning_rate": 1.8077025788226898e-06,
|
|
"loss": 0.37153083086013794,
|
|
"step": 4276
|
|
},
|
|
{
|
|
"epoch": 2.2463235294117645,
|
|
"grad_norm": 17.569367020613644,
|
|
"learning_rate": 1.8053510897804105e-06,
|
|
"loss": 0.4909835457801819,
|
|
"step": 4277
|
|
},
|
|
{
|
|
"epoch": 2.2468487394957983,
|
|
"grad_norm": 13.35263942734109,
|
|
"learning_rate": 1.80300079416725e-06,
|
|
"loss": 0.47448334097862244,
|
|
"step": 4278
|
|
},
|
|
{
|
|
"epoch": 2.247373949579832,
|
|
"grad_norm": 9.428658728702898,
|
|
"learning_rate": 1.8006516928612121e-06,
|
|
"loss": 0.8697469234466553,
|
|
"step": 4279
|
|
},
|
|
{
|
|
"epoch": 2.2478991596638656,
|
|
"grad_norm": 9.434113994014082,
|
|
"learning_rate": 1.798303786739854e-06,
|
|
"loss": 0.44476377964019775,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 2.248424369747899,
|
|
"grad_norm": 13.118989511658329,
|
|
"learning_rate": 1.7959570766802847e-06,
|
|
"loss": 0.7967353463172913,
|
|
"step": 4281
|
|
},
|
|
{
|
|
"epoch": 2.2489495798319328,
|
|
"grad_norm": 13.719742825553485,
|
|
"learning_rate": 1.7936115635591684e-06,
|
|
"loss": 0.8126027584075928,
|
|
"step": 4282
|
|
},
|
|
{
|
|
"epoch": 2.2494747899159666,
|
|
"grad_norm": 7.911725621639151,
|
|
"learning_rate": 1.79126724825272e-06,
|
|
"loss": 0.3285817801952362,
|
|
"step": 4283
|
|
},
|
|
{
|
|
"epoch": 2.25,
|
|
"grad_norm": 8.437431585657757,
|
|
"learning_rate": 1.7889241316367112e-06,
|
|
"loss": 0.3654254078865051,
|
|
"step": 4284
|
|
},
|
|
{
|
|
"epoch": 2.2505252100840334,
|
|
"grad_norm": 6.008724218610319,
|
|
"learning_rate": 1.786582214586462e-06,
|
|
"loss": 0.3179757297039032,
|
|
"step": 4285
|
|
},
|
|
{
|
|
"epoch": 2.2510504201680672,
|
|
"grad_norm": 9.53388500267815,
|
|
"learning_rate": 1.7842414979768453e-06,
|
|
"loss": 0.5743989944458008,
|
|
"step": 4286
|
|
},
|
|
{
|
|
"epoch": 2.251575630252101,
|
|
"grad_norm": 7.160110305296361,
|
|
"learning_rate": 1.7819019826822853e-06,
|
|
"loss": 0.37717652320861816,
|
|
"step": 4287
|
|
},
|
|
{
|
|
"epoch": 2.2521008403361344,
|
|
"grad_norm": 9.154946871408706,
|
|
"learning_rate": 1.7795636695767582e-06,
|
|
"loss": 0.204721137881279,
|
|
"step": 4288
|
|
},
|
|
{
|
|
"epoch": 2.252626050420168,
|
|
"grad_norm": 14.832979058980008,
|
|
"learning_rate": 1.77722655953379e-06,
|
|
"loss": 0.6357308626174927,
|
|
"step": 4289
|
|
},
|
|
{
|
|
"epoch": 2.2531512605042017,
|
|
"grad_norm": 12.381302350771634,
|
|
"learning_rate": 1.7748906534264565e-06,
|
|
"loss": 0.3569576144218445,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 2.2536764705882355,
|
|
"grad_norm": 11.748537287955768,
|
|
"learning_rate": 1.7725559521273887e-06,
|
|
"loss": 0.5137014389038086,
|
|
"step": 4291
|
|
},
|
|
{
|
|
"epoch": 2.254201680672269,
|
|
"grad_norm": 8.644959563734952,
|
|
"learning_rate": 1.7702224565087629e-06,
|
|
"loss": 0.6743233799934387,
|
|
"step": 4292
|
|
},
|
|
{
|
|
"epoch": 2.2547268907563023,
|
|
"grad_norm": 7.622560298300759,
|
|
"learning_rate": 1.7678901674423044e-06,
|
|
"loss": 0.7572541832923889,
|
|
"step": 4293
|
|
},
|
|
{
|
|
"epoch": 2.255252100840336,
|
|
"grad_norm": 9.916966544236823,
|
|
"learning_rate": 1.765559085799291e-06,
|
|
"loss": 0.5181331634521484,
|
|
"step": 4294
|
|
},
|
|
{
|
|
"epoch": 2.25577731092437,
|
|
"grad_norm": 10.883205033840762,
|
|
"learning_rate": 1.7632292124505474e-06,
|
|
"loss": 0.7161169648170471,
|
|
"step": 4295
|
|
},
|
|
{
|
|
"epoch": 2.2563025210084033,
|
|
"grad_norm": 6.951025062262527,
|
|
"learning_rate": 1.7609005482664472e-06,
|
|
"loss": 0.2906675338745117,
|
|
"step": 4296
|
|
},
|
|
{
|
|
"epoch": 2.2568277310924367,
|
|
"grad_norm": 14.973160304477666,
|
|
"learning_rate": 1.7585730941169105e-06,
|
|
"loss": 0.6282626986503601,
|
|
"step": 4297
|
|
},
|
|
{
|
|
"epoch": 2.2573529411764706,
|
|
"grad_norm": 8.911523746568987,
|
|
"learning_rate": 1.7562468508714115e-06,
|
|
"loss": 0.40257585048675537,
|
|
"step": 4298
|
|
},
|
|
{
|
|
"epoch": 2.2578781512605044,
|
|
"grad_norm": 14.840054397955528,
|
|
"learning_rate": 1.753921819398966e-06,
|
|
"loss": 0.8833602666854858,
|
|
"step": 4299
|
|
},
|
|
{
|
|
"epoch": 2.258403361344538,
|
|
"grad_norm": 15.331435867589901,
|
|
"learning_rate": 1.7515980005681383e-06,
|
|
"loss": 0.3591822385787964,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 2.2589285714285716,
|
|
"grad_norm": 9.454405703715667,
|
|
"learning_rate": 1.7492753952470415e-06,
|
|
"loss": 0.36576658487319946,
|
|
"step": 4301
|
|
},
|
|
{
|
|
"epoch": 2.259453781512605,
|
|
"grad_norm": 12.652525405012328,
|
|
"learning_rate": 1.7469540043033335e-06,
|
|
"loss": 0.3571898639202118,
|
|
"step": 4302
|
|
},
|
|
{
|
|
"epoch": 2.259978991596639,
|
|
"grad_norm": 8.532771320904967,
|
|
"learning_rate": 1.7446338286042196e-06,
|
|
"loss": 0.5041860342025757,
|
|
"step": 4303
|
|
},
|
|
{
|
|
"epoch": 2.2605042016806722,
|
|
"grad_norm": 16.742160510813598,
|
|
"learning_rate": 1.7423148690164505e-06,
|
|
"loss": 0.4292744994163513,
|
|
"step": 4304
|
|
},
|
|
{
|
|
"epoch": 2.261029411764706,
|
|
"grad_norm": 8.742113836470347,
|
|
"learning_rate": 1.739997126406322e-06,
|
|
"loss": 0.13389720022678375,
|
|
"step": 4305
|
|
},
|
|
{
|
|
"epoch": 2.2615546218487395,
|
|
"grad_norm": 12.027532236582896,
|
|
"learning_rate": 1.7376806016396786e-06,
|
|
"loss": 0.7063834071159363,
|
|
"step": 4306
|
|
},
|
|
{
|
|
"epoch": 2.2620798319327733,
|
|
"grad_norm": 15.170850761266513,
|
|
"learning_rate": 1.7353652955819067e-06,
|
|
"loss": 0.2498408555984497,
|
|
"step": 4307
|
|
},
|
|
{
|
|
"epoch": 2.2626050420168067,
|
|
"grad_norm": 10.825787617819445,
|
|
"learning_rate": 1.7330512090979372e-06,
|
|
"loss": 0.6601535081863403,
|
|
"step": 4308
|
|
},
|
|
{
|
|
"epoch": 2.2631302521008405,
|
|
"grad_norm": 13.445506801696927,
|
|
"learning_rate": 1.7307383430522474e-06,
|
|
"loss": 0.7677643299102783,
|
|
"step": 4309
|
|
},
|
|
{
|
|
"epoch": 2.263655462184874,
|
|
"grad_norm": 10.403545817284883,
|
|
"learning_rate": 1.7284266983088565e-06,
|
|
"loss": 0.3086925446987152,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 2.2641806722689077,
|
|
"grad_norm": 6.837787702603852,
|
|
"learning_rate": 1.7261162757313299e-06,
|
|
"loss": 0.15964150428771973,
|
|
"step": 4311
|
|
},
|
|
{
|
|
"epoch": 2.264705882352941,
|
|
"grad_norm": 16.02081449997177,
|
|
"learning_rate": 1.7238070761827725e-06,
|
|
"loss": 0.5171663165092468,
|
|
"step": 4312
|
|
},
|
|
{
|
|
"epoch": 2.265231092436975,
|
|
"grad_norm": 15.381150793292788,
|
|
"learning_rate": 1.7214991005258386e-06,
|
|
"loss": 0.7723196744918823,
|
|
"step": 4313
|
|
},
|
|
{
|
|
"epoch": 2.2657563025210083,
|
|
"grad_norm": 15.114327333104479,
|
|
"learning_rate": 1.7191923496227203e-06,
|
|
"loss": 0.3939249813556671,
|
|
"step": 4314
|
|
},
|
|
{
|
|
"epoch": 2.266281512605042,
|
|
"grad_norm": 15.411326455149124,
|
|
"learning_rate": 1.7168868243351532e-06,
|
|
"loss": 0.36170852184295654,
|
|
"step": 4315
|
|
},
|
|
{
|
|
"epoch": 2.2668067226890756,
|
|
"grad_norm": 10.25117781012011,
|
|
"learning_rate": 1.7145825255244153e-06,
|
|
"loss": 0.47441792488098145,
|
|
"step": 4316
|
|
},
|
|
{
|
|
"epoch": 2.2673319327731094,
|
|
"grad_norm": 7.4714144047658735,
|
|
"learning_rate": 1.7122794540513265e-06,
|
|
"loss": 0.44890767335891724,
|
|
"step": 4317
|
|
},
|
|
{
|
|
"epoch": 2.267857142857143,
|
|
"grad_norm": 16.228638107624416,
|
|
"learning_rate": 1.7099776107762483e-06,
|
|
"loss": 0.7477537393569946,
|
|
"step": 4318
|
|
},
|
|
{
|
|
"epoch": 2.2683823529411766,
|
|
"grad_norm": 11.69767304812106,
|
|
"learning_rate": 1.7076769965590834e-06,
|
|
"loss": 0.4765632152557373,
|
|
"step": 4319
|
|
},
|
|
{
|
|
"epoch": 2.26890756302521,
|
|
"grad_norm": 9.308848236540593,
|
|
"learning_rate": 1.705377612259273e-06,
|
|
"loss": 0.2893710136413574,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 2.269432773109244,
|
|
"grad_norm": 8.555800306382825,
|
|
"learning_rate": 1.703079458735805e-06,
|
|
"loss": 0.3118830621242523,
|
|
"step": 4321
|
|
},
|
|
{
|
|
"epoch": 2.2699579831932772,
|
|
"grad_norm": 13.776481389860628,
|
|
"learning_rate": 1.700782536847202e-06,
|
|
"loss": 0.7710450291633606,
|
|
"step": 4322
|
|
},
|
|
{
|
|
"epoch": 2.270483193277311,
|
|
"grad_norm": 10.577928239660647,
|
|
"learning_rate": 1.698486847451527e-06,
|
|
"loss": 0.47262483835220337,
|
|
"step": 4323
|
|
},
|
|
{
|
|
"epoch": 2.2710084033613445,
|
|
"grad_norm": 13.069959610631283,
|
|
"learning_rate": 1.696192391406385e-06,
|
|
"loss": 0.5836432576179504,
|
|
"step": 4324
|
|
},
|
|
{
|
|
"epoch": 2.2715336134453783,
|
|
"grad_norm": 10.966580417486627,
|
|
"learning_rate": 1.6938991695689184e-06,
|
|
"loss": 0.6819464564323425,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 2.2720588235294117,
|
|
"grad_norm": 13.005761213457445,
|
|
"learning_rate": 1.6916071827958087e-06,
|
|
"loss": 0.33721476793289185,
|
|
"step": 4326
|
|
},
|
|
{
|
|
"epoch": 2.2725840336134455,
|
|
"grad_norm": 13.135660336334526,
|
|
"learning_rate": 1.6893164319432748e-06,
|
|
"loss": 0.5102297067642212,
|
|
"step": 4327
|
|
},
|
|
{
|
|
"epoch": 2.273109243697479,
|
|
"grad_norm": 8.064421370615745,
|
|
"learning_rate": 1.6870269178670795e-06,
|
|
"loss": 0.3952482342720032,
|
|
"step": 4328
|
|
},
|
|
{
|
|
"epoch": 2.2736344537815127,
|
|
"grad_norm": 10.792512616267555,
|
|
"learning_rate": 1.684738641422517e-06,
|
|
"loss": 0.7456547021865845,
|
|
"step": 4329
|
|
},
|
|
{
|
|
"epoch": 2.274159663865546,
|
|
"grad_norm": 15.020122157114749,
|
|
"learning_rate": 1.6824516034644217e-06,
|
|
"loss": 0.5874942541122437,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 2.27468487394958,
|
|
"grad_norm": 10.032837792324347,
|
|
"learning_rate": 1.6801658048471658e-06,
|
|
"loss": 0.6321033835411072,
|
|
"step": 4331
|
|
},
|
|
{
|
|
"epoch": 2.2752100840336134,
|
|
"grad_norm": 11.303584435137592,
|
|
"learning_rate": 1.677881246424658e-06,
|
|
"loss": 0.450790673494339,
|
|
"step": 4332
|
|
},
|
|
{
|
|
"epoch": 2.275735294117647,
|
|
"grad_norm": 9.847503120573618,
|
|
"learning_rate": 1.6755979290503437e-06,
|
|
"loss": 0.404061496257782,
|
|
"step": 4333
|
|
},
|
|
{
|
|
"epoch": 2.2762605042016806,
|
|
"grad_norm": 15.949321649438565,
|
|
"learning_rate": 1.6733158535772031e-06,
|
|
"loss": 0.3456595242023468,
|
|
"step": 4334
|
|
},
|
|
{
|
|
"epoch": 2.2767857142857144,
|
|
"grad_norm": 14.872070372220424,
|
|
"learning_rate": 1.671035020857757e-06,
|
|
"loss": 0.34505870938301086,
|
|
"step": 4335
|
|
},
|
|
{
|
|
"epoch": 2.277310924369748,
|
|
"grad_norm": 7.214427980667019,
|
|
"learning_rate": 1.6687554317440575e-06,
|
|
"loss": 0.3279365301132202,
|
|
"step": 4336
|
|
},
|
|
{
|
|
"epoch": 2.2778361344537816,
|
|
"grad_norm": 10.78146979921209,
|
|
"learning_rate": 1.666477087087694e-06,
|
|
"loss": 0.532626211643219,
|
|
"step": 4337
|
|
},
|
|
{
|
|
"epoch": 2.278361344537815,
|
|
"grad_norm": 7.522589992997947,
|
|
"learning_rate": 1.6641999877397903e-06,
|
|
"loss": 0.45383769273757935,
|
|
"step": 4338
|
|
},
|
|
{
|
|
"epoch": 2.278886554621849,
|
|
"grad_norm": 7.990300738402109,
|
|
"learning_rate": 1.6619241345510057e-06,
|
|
"loss": 0.3599814772605896,
|
|
"step": 4339
|
|
},
|
|
{
|
|
"epoch": 2.2794117647058822,
|
|
"grad_norm": 10.848762924233993,
|
|
"learning_rate": 1.659649528371533e-06,
|
|
"loss": 0.36083564162254333,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 2.279936974789916,
|
|
"grad_norm": 12.54704117742708,
|
|
"learning_rate": 1.6573761700511004e-06,
|
|
"loss": 0.3345023989677429,
|
|
"step": 4341
|
|
},
|
|
{
|
|
"epoch": 2.2804621848739495,
|
|
"grad_norm": 7.564376089304036,
|
|
"learning_rate": 1.6551040604389674e-06,
|
|
"loss": 0.3725637197494507,
|
|
"step": 4342
|
|
},
|
|
{
|
|
"epoch": 2.2809873949579833,
|
|
"grad_norm": 14.54050293277215,
|
|
"learning_rate": 1.6528332003839325e-06,
|
|
"loss": 0.3416447639465332,
|
|
"step": 4343
|
|
},
|
|
{
|
|
"epoch": 2.2815126050420167,
|
|
"grad_norm": 10.163078332156777,
|
|
"learning_rate": 1.6505635907343214e-06,
|
|
"loss": 0.39350181818008423,
|
|
"step": 4344
|
|
},
|
|
{
|
|
"epoch": 2.2820378151260505,
|
|
"grad_norm": 11.154794151475356,
|
|
"learning_rate": 1.6482952323379958e-06,
|
|
"loss": 0.7113596200942993,
|
|
"step": 4345
|
|
},
|
|
{
|
|
"epoch": 2.282563025210084,
|
|
"grad_norm": 14.418055790581414,
|
|
"learning_rate": 1.6460281260423495e-06,
|
|
"loss": 0.3519514203071594,
|
|
"step": 4346
|
|
},
|
|
{
|
|
"epoch": 2.2830882352941178,
|
|
"grad_norm": 14.520078892222791,
|
|
"learning_rate": 1.6437622726943076e-06,
|
|
"loss": 1.4245522022247314,
|
|
"step": 4347
|
|
},
|
|
{
|
|
"epoch": 2.283613445378151,
|
|
"grad_norm": 8.285158276756874,
|
|
"learning_rate": 1.6414976731403265e-06,
|
|
"loss": 0.41295862197875977,
|
|
"step": 4348
|
|
},
|
|
{
|
|
"epoch": 2.284138655462185,
|
|
"grad_norm": 7.240487739459167,
|
|
"learning_rate": 1.639234328226399e-06,
|
|
"loss": 0.28958454728126526,
|
|
"step": 4349
|
|
},
|
|
{
|
|
"epoch": 2.2846638655462184,
|
|
"grad_norm": 8.628130571430734,
|
|
"learning_rate": 1.6369722387980442e-06,
|
|
"loss": 0.29284539818763733,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 2.285189075630252,
|
|
"grad_norm": 6.934118703501691,
|
|
"learning_rate": 1.6347114057003116e-06,
|
|
"loss": 0.30714741349220276,
|
|
"step": 4351
|
|
},
|
|
{
|
|
"epoch": 2.2857142857142856,
|
|
"grad_norm": 9.935739273690379,
|
|
"learning_rate": 1.6324518297777875e-06,
|
|
"loss": 0.3020477592945099,
|
|
"step": 4352
|
|
},
|
|
{
|
|
"epoch": 2.2862394957983194,
|
|
"grad_norm": 10.933111882023026,
|
|
"learning_rate": 1.6301935118745826e-06,
|
|
"loss": 0.3865164518356323,
|
|
"step": 4353
|
|
},
|
|
{
|
|
"epoch": 2.286764705882353,
|
|
"grad_norm": 11.428036489735486,
|
|
"learning_rate": 1.62793645283434e-06,
|
|
"loss": 0.6368634104728699,
|
|
"step": 4354
|
|
},
|
|
{
|
|
"epoch": 2.2872899159663866,
|
|
"grad_norm": 16.517205459997992,
|
|
"learning_rate": 1.6256806535002312e-06,
|
|
"loss": 1.2036032676696777,
|
|
"step": 4355
|
|
},
|
|
{
|
|
"epoch": 2.28781512605042,
|
|
"grad_norm": 9.146910147728873,
|
|
"learning_rate": 1.6234261147149594e-06,
|
|
"loss": 0.28254234790802,
|
|
"step": 4356
|
|
},
|
|
{
|
|
"epoch": 2.288340336134454,
|
|
"grad_norm": 14.054019769884457,
|
|
"learning_rate": 1.621172837320754e-06,
|
|
"loss": 0.5170860290527344,
|
|
"step": 4357
|
|
},
|
|
{
|
|
"epoch": 2.2888655462184873,
|
|
"grad_norm": 13.45097323662491,
|
|
"learning_rate": 1.618920822159375e-06,
|
|
"loss": 0.32772767543792725,
|
|
"step": 4358
|
|
},
|
|
{
|
|
"epoch": 2.289390756302521,
|
|
"grad_norm": 13.691649496391307,
|
|
"learning_rate": 1.6166700700721121e-06,
|
|
"loss": 0.38849198818206787,
|
|
"step": 4359
|
|
},
|
|
{
|
|
"epoch": 2.2899159663865545,
|
|
"grad_norm": 12.1222124753655,
|
|
"learning_rate": 1.614420581899781e-06,
|
|
"loss": 0.3882126808166504,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 2.2904411764705883,
|
|
"grad_norm": 11.678439052954568,
|
|
"learning_rate": 1.6121723584827259e-06,
|
|
"loss": 0.5290379524230957,
|
|
"step": 4361
|
|
},
|
|
{
|
|
"epoch": 2.2909663865546217,
|
|
"grad_norm": 12.6188254512893,
|
|
"learning_rate": 1.6099254006608183e-06,
|
|
"loss": 0.19659966230392456,
|
|
"step": 4362
|
|
},
|
|
{
|
|
"epoch": 2.2914915966386555,
|
|
"grad_norm": 12.166327804570395,
|
|
"learning_rate": 1.6076797092734575e-06,
|
|
"loss": 0.3771727681159973,
|
|
"step": 4363
|
|
},
|
|
{
|
|
"epoch": 2.292016806722689,
|
|
"grad_norm": 15.895343917997305,
|
|
"learning_rate": 1.6054352851595684e-06,
|
|
"loss": 0.42266786098480225,
|
|
"step": 4364
|
|
},
|
|
{
|
|
"epoch": 2.2925420168067228,
|
|
"grad_norm": 13.12300565247586,
|
|
"learning_rate": 1.6031921291576048e-06,
|
|
"loss": 0.8594948649406433,
|
|
"step": 4365
|
|
},
|
|
{
|
|
"epoch": 2.293067226890756,
|
|
"grad_norm": 9.409440418091075,
|
|
"learning_rate": 1.6009502421055423e-06,
|
|
"loss": 0.3741492033004761,
|
|
"step": 4366
|
|
},
|
|
{
|
|
"epoch": 2.29359243697479,
|
|
"grad_norm": 19.970440709074943,
|
|
"learning_rate": 1.5987096248408896e-06,
|
|
"loss": 0.780980110168457,
|
|
"step": 4367
|
|
},
|
|
{
|
|
"epoch": 2.2941176470588234,
|
|
"grad_norm": 6.582649351131381,
|
|
"learning_rate": 1.5964702782006753e-06,
|
|
"loss": 0.20653480291366577,
|
|
"step": 4368
|
|
},
|
|
{
|
|
"epoch": 2.294642857142857,
|
|
"grad_norm": 15.809918568473499,
|
|
"learning_rate": 1.5942322030214547e-06,
|
|
"loss": 0.6179236173629761,
|
|
"step": 4369
|
|
},
|
|
{
|
|
"epoch": 2.2951680672268906,
|
|
"grad_norm": 12.307168966328556,
|
|
"learning_rate": 1.591995400139309e-06,
|
|
"loss": 0.4987037479877472,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 2.2956932773109244,
|
|
"grad_norm": 12.33854650104722,
|
|
"learning_rate": 1.5897598703898432e-06,
|
|
"loss": 0.6230236887931824,
|
|
"step": 4371
|
|
},
|
|
{
|
|
"epoch": 2.296218487394958,
|
|
"grad_norm": 14.145721467951539,
|
|
"learning_rate": 1.5875256146081868e-06,
|
|
"loss": 0.4220436215400696,
|
|
"step": 4372
|
|
},
|
|
{
|
|
"epoch": 2.2967436974789917,
|
|
"grad_norm": 13.867589347346152,
|
|
"learning_rate": 1.5852926336289926e-06,
|
|
"loss": 0.3709479570388794,
|
|
"step": 4373
|
|
},
|
|
{
|
|
"epoch": 2.297268907563025,
|
|
"grad_norm": 9.87653617656067,
|
|
"learning_rate": 1.5830609282864412e-06,
|
|
"loss": 0.3221050500869751,
|
|
"step": 4374
|
|
},
|
|
{
|
|
"epoch": 2.297794117647059,
|
|
"grad_norm": 9.950679674557886,
|
|
"learning_rate": 1.5808304994142315e-06,
|
|
"loss": 0.4723668396472931,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 2.2983193277310923,
|
|
"grad_norm": 12.8159080605527,
|
|
"learning_rate": 1.578601347845588e-06,
|
|
"loss": 0.5835788249969482,
|
|
"step": 4376
|
|
},
|
|
{
|
|
"epoch": 2.298844537815126,
|
|
"grad_norm": 10.218066774260638,
|
|
"learning_rate": 1.5763734744132587e-06,
|
|
"loss": 0.3129946291446686,
|
|
"step": 4377
|
|
},
|
|
{
|
|
"epoch": 2.2993697478991595,
|
|
"grad_norm": 9.852644863950895,
|
|
"learning_rate": 1.5741468799495112e-06,
|
|
"loss": 0.25117218494415283,
|
|
"step": 4378
|
|
},
|
|
{
|
|
"epoch": 2.2998949579831933,
|
|
"grad_norm": 14.071437665169947,
|
|
"learning_rate": 1.571921565286139e-06,
|
|
"loss": 0.5350000858306885,
|
|
"step": 4379
|
|
},
|
|
{
|
|
"epoch": 2.3004201680672267,
|
|
"grad_norm": 16.766135653846202,
|
|
"learning_rate": 1.5696975312544532e-06,
|
|
"loss": 0.3731675148010254,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 2.3009453781512605,
|
|
"grad_norm": 12.037650977090113,
|
|
"learning_rate": 1.5674747786852935e-06,
|
|
"loss": 0.43751031160354614,
|
|
"step": 4381
|
|
},
|
|
{
|
|
"epoch": 2.301470588235294,
|
|
"grad_norm": 11.089661880105295,
|
|
"learning_rate": 1.5652533084090126e-06,
|
|
"loss": 0.4016155004501343,
|
|
"step": 4382
|
|
},
|
|
{
|
|
"epoch": 2.3019957983193278,
|
|
"grad_norm": 10.943201933863106,
|
|
"learning_rate": 1.5630331212554906e-06,
|
|
"loss": 0.8298485279083252,
|
|
"step": 4383
|
|
},
|
|
{
|
|
"epoch": 2.302521008403361,
|
|
"grad_norm": 9.14803865518618,
|
|
"learning_rate": 1.5608142180541236e-06,
|
|
"loss": 0.25823837518692017,
|
|
"step": 4384
|
|
},
|
|
{
|
|
"epoch": 2.303046218487395,
|
|
"grad_norm": 8.04483506782103,
|
|
"learning_rate": 1.5585965996338314e-06,
|
|
"loss": 0.3800016939640045,
|
|
"step": 4385
|
|
},
|
|
{
|
|
"epoch": 2.3035714285714284,
|
|
"grad_norm": 10.034853433108301,
|
|
"learning_rate": 1.5563802668230522e-06,
|
|
"loss": 0.48947906494140625,
|
|
"step": 4386
|
|
},
|
|
{
|
|
"epoch": 2.304096638655462,
|
|
"grad_norm": 12.286004901127377,
|
|
"learning_rate": 1.5541652204497443e-06,
|
|
"loss": 0.7934751510620117,
|
|
"step": 4387
|
|
},
|
|
{
|
|
"epoch": 2.3046218487394956,
|
|
"grad_norm": 9.313521210545806,
|
|
"learning_rate": 1.5519514613413832e-06,
|
|
"loss": 0.3479290306568146,
|
|
"step": 4388
|
|
},
|
|
{
|
|
"epoch": 2.3051470588235294,
|
|
"grad_norm": 9.705513405157093,
|
|
"learning_rate": 1.5497389903249705e-06,
|
|
"loss": 0.5358251333236694,
|
|
"step": 4389
|
|
},
|
|
{
|
|
"epoch": 2.3056722689075633,
|
|
"grad_norm": 10.167095469659124,
|
|
"learning_rate": 1.5475278082270185e-06,
|
|
"loss": 0.33251112699508667,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 2.3061974789915967,
|
|
"grad_norm": 12.219389713798087,
|
|
"learning_rate": 1.5453179158735626e-06,
|
|
"loss": 0.6672162413597107,
|
|
"step": 4391
|
|
},
|
|
{
|
|
"epoch": 2.30672268907563,
|
|
"grad_norm": 17.70724727180953,
|
|
"learning_rate": 1.5431093140901548e-06,
|
|
"loss": 0.3237547278404236,
|
|
"step": 4392
|
|
},
|
|
{
|
|
"epoch": 2.307247899159664,
|
|
"grad_norm": 8.29131756283089,
|
|
"learning_rate": 1.5409020037018652e-06,
|
|
"loss": 0.4832186996936798,
|
|
"step": 4393
|
|
},
|
|
{
|
|
"epoch": 2.3077731092436977,
|
|
"grad_norm": 11.148583584479832,
|
|
"learning_rate": 1.538695985533281e-06,
|
|
"loss": 0.8950933218002319,
|
|
"step": 4394
|
|
},
|
|
{
|
|
"epoch": 2.308298319327731,
|
|
"grad_norm": 15.460688906601312,
|
|
"learning_rate": 1.536491260408507e-06,
|
|
"loss": 0.5262115597724915,
|
|
"step": 4395
|
|
},
|
|
{
|
|
"epoch": 2.3088235294117645,
|
|
"grad_norm": 16.68986140644925,
|
|
"learning_rate": 1.5342878291511675e-06,
|
|
"loss": 1.0260121822357178,
|
|
"step": 4396
|
|
},
|
|
{
|
|
"epoch": 2.3093487394957983,
|
|
"grad_norm": 7.571106561965695,
|
|
"learning_rate": 1.5320856925843997e-06,
|
|
"loss": 0.1923689842224121,
|
|
"step": 4397
|
|
},
|
|
{
|
|
"epoch": 2.309873949579832,
|
|
"grad_norm": 11.044530209518856,
|
|
"learning_rate": 1.5298848515308584e-06,
|
|
"loss": 0.31885623931884766,
|
|
"step": 4398
|
|
},
|
|
{
|
|
"epoch": 2.3103991596638656,
|
|
"grad_norm": 8.849088027270751,
|
|
"learning_rate": 1.527685306812715e-06,
|
|
"loss": 0.4185711741447449,
|
|
"step": 4399
|
|
},
|
|
{
|
|
"epoch": 2.310924369747899,
|
|
"grad_norm": 11.187801148523693,
|
|
"learning_rate": 1.5254870592516569e-06,
|
|
"loss": 0.4159872233867645,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 2.3114495798319328,
|
|
"grad_norm": 9.827097352712677,
|
|
"learning_rate": 1.5232901096688847e-06,
|
|
"loss": 0.3779459595680237,
|
|
"step": 4401
|
|
},
|
|
{
|
|
"epoch": 2.3119747899159666,
|
|
"grad_norm": 12.91796680900067,
|
|
"learning_rate": 1.5210944588851168e-06,
|
|
"loss": 0.44369253516197205,
|
|
"step": 4402
|
|
},
|
|
{
|
|
"epoch": 2.3125,
|
|
"grad_norm": 11.462757793812512,
|
|
"learning_rate": 1.5189001077205835e-06,
|
|
"loss": 0.39783555269241333,
|
|
"step": 4403
|
|
},
|
|
{
|
|
"epoch": 2.3130252100840334,
|
|
"grad_norm": 8.897084326786889,
|
|
"learning_rate": 1.5167070569950344e-06,
|
|
"loss": 0.46692177653312683,
|
|
"step": 4404
|
|
},
|
|
{
|
|
"epoch": 2.3135504201680672,
|
|
"grad_norm": 14.093689957647248,
|
|
"learning_rate": 1.5145153075277286e-06,
|
|
"loss": 0.7288868427276611,
|
|
"step": 4405
|
|
},
|
|
{
|
|
"epoch": 2.314075630252101,
|
|
"grad_norm": 10.454799310162347,
|
|
"learning_rate": 1.5123248601374413e-06,
|
|
"loss": 0.615166425704956,
|
|
"step": 4406
|
|
},
|
|
{
|
|
"epoch": 2.3146008403361344,
|
|
"grad_norm": 7.12706837617487,
|
|
"learning_rate": 1.5101357156424601e-06,
|
|
"loss": 0.20782826840877533,
|
|
"step": 4407
|
|
},
|
|
{
|
|
"epoch": 2.315126050420168,
|
|
"grad_norm": 11.74774688766439,
|
|
"learning_rate": 1.5079478748605874e-06,
|
|
"loss": 0.3646509051322937,
|
|
"step": 4408
|
|
},
|
|
{
|
|
"epoch": 2.3156512605042017,
|
|
"grad_norm": 11.905856892317097,
|
|
"learning_rate": 1.505761338609137e-06,
|
|
"loss": 0.47605645656585693,
|
|
"step": 4409
|
|
},
|
|
{
|
|
"epoch": 2.3161764705882355,
|
|
"grad_norm": 15.026179011873387,
|
|
"learning_rate": 1.5035761077049344e-06,
|
|
"loss": 0.7398310303688049,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 2.316701680672269,
|
|
"grad_norm": 10.281670323887347,
|
|
"learning_rate": 1.501392182964323e-06,
|
|
"loss": 0.20230919122695923,
|
|
"step": 4411
|
|
},
|
|
{
|
|
"epoch": 2.3172268907563023,
|
|
"grad_norm": 11.672565647048748,
|
|
"learning_rate": 1.4992095652031518e-06,
|
|
"loss": 0.8576836585998535,
|
|
"step": 4412
|
|
},
|
|
{
|
|
"epoch": 2.317752100840336,
|
|
"grad_norm": 15.679845450616638,
|
|
"learning_rate": 1.4970282552367854e-06,
|
|
"loss": 0.3027660846710205,
|
|
"step": 4413
|
|
},
|
|
{
|
|
"epoch": 2.31827731092437,
|
|
"grad_norm": 10.589700262780413,
|
|
"learning_rate": 1.4948482538800974e-06,
|
|
"loss": 0.28417763113975525,
|
|
"step": 4414
|
|
},
|
|
{
|
|
"epoch": 2.3188025210084033,
|
|
"grad_norm": 8.405692327071685,
|
|
"learning_rate": 1.4926695619474747e-06,
|
|
"loss": 0.3037428855895996,
|
|
"step": 4415
|
|
},
|
|
{
|
|
"epoch": 2.3193277310924367,
|
|
"grad_norm": 13.642124447538226,
|
|
"learning_rate": 1.4904921802528133e-06,
|
|
"loss": 0.52169269323349,
|
|
"step": 4416
|
|
},
|
|
{
|
|
"epoch": 2.3198529411764706,
|
|
"grad_norm": 12.377939837406828,
|
|
"learning_rate": 1.4883161096095189e-06,
|
|
"loss": 0.9864881038665771,
|
|
"step": 4417
|
|
},
|
|
{
|
|
"epoch": 2.3203781512605044,
|
|
"grad_norm": 10.41779799698978,
|
|
"learning_rate": 1.4861413508305128e-06,
|
|
"loss": 0.4198512136936188,
|
|
"step": 4418
|
|
},
|
|
{
|
|
"epoch": 2.320903361344538,
|
|
"grad_norm": 10.179067127791503,
|
|
"learning_rate": 1.4839679047282206e-06,
|
|
"loss": 0.34290653467178345,
|
|
"step": 4419
|
|
},
|
|
{
|
|
"epoch": 2.3214285714285716,
|
|
"grad_norm": 7.925017466820183,
|
|
"learning_rate": 1.4817957721145793e-06,
|
|
"loss": 0.7075372934341431,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 2.321953781512605,
|
|
"grad_norm": 6.103286752754161,
|
|
"learning_rate": 1.4796249538010354e-06,
|
|
"loss": 0.3007611334323883,
|
|
"step": 4421
|
|
},
|
|
{
|
|
"epoch": 2.322478991596639,
|
|
"grad_norm": 12.25961755977851,
|
|
"learning_rate": 1.477455450598544e-06,
|
|
"loss": 0.4300242066383362,
|
|
"step": 4422
|
|
},
|
|
{
|
|
"epoch": 2.3230042016806722,
|
|
"grad_norm": 8.699434886780793,
|
|
"learning_rate": 1.4752872633175691e-06,
|
|
"loss": 0.4099404811859131,
|
|
"step": 4423
|
|
},
|
|
{
|
|
"epoch": 2.323529411764706,
|
|
"grad_norm": 10.741312045169979,
|
|
"learning_rate": 1.4731203927680842e-06,
|
|
"loss": 0.5882526636123657,
|
|
"step": 4424
|
|
},
|
|
{
|
|
"epoch": 2.3240546218487395,
|
|
"grad_norm": 9.297351370478692,
|
|
"learning_rate": 1.4709548397595674e-06,
|
|
"loss": 0.5598904490470886,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 2.3245798319327733,
|
|
"grad_norm": 13.838742636903982,
|
|
"learning_rate": 1.4687906051010103e-06,
|
|
"loss": 0.6112613677978516,
|
|
"step": 4426
|
|
},
|
|
{
|
|
"epoch": 2.3251050420168067,
|
|
"grad_norm": 6.7243179871563425,
|
|
"learning_rate": 1.4666276896009079e-06,
|
|
"loss": 0.2332310527563095,
|
|
"step": 4427
|
|
},
|
|
{
|
|
"epoch": 2.3256302521008405,
|
|
"grad_norm": 10.971361557829512,
|
|
"learning_rate": 1.4644660940672628e-06,
|
|
"loss": 0.3349398970603943,
|
|
"step": 4428
|
|
},
|
|
{
|
|
"epoch": 2.326155462184874,
|
|
"grad_norm": 9.07198473960286,
|
|
"learning_rate": 1.4623058193075852e-06,
|
|
"loss": 1.245658040046692,
|
|
"step": 4429
|
|
},
|
|
{
|
|
"epoch": 2.3266806722689077,
|
|
"grad_norm": 15.482122106635144,
|
|
"learning_rate": 1.460146866128892e-06,
|
|
"loss": 0.2873179316520691,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 2.327205882352941,
|
|
"grad_norm": 11.523410821172185,
|
|
"learning_rate": 1.4579892353377055e-06,
|
|
"loss": 0.5670019388198853,
|
|
"step": 4431
|
|
},
|
|
{
|
|
"epoch": 2.327731092436975,
|
|
"grad_norm": 8.663789278131366,
|
|
"learning_rate": 1.4558329277400535e-06,
|
|
"loss": 0.11815594136714935,
|
|
"step": 4432
|
|
},
|
|
{
|
|
"epoch": 2.3282563025210083,
|
|
"grad_norm": 10.147117036298072,
|
|
"learning_rate": 1.453677944141474e-06,
|
|
"loss": 0.18382048606872559,
|
|
"step": 4433
|
|
},
|
|
{
|
|
"epoch": 2.328781512605042,
|
|
"grad_norm": 11.139442297776682,
|
|
"learning_rate": 1.4515242853470047e-06,
|
|
"loss": 1.187211275100708,
|
|
"step": 4434
|
|
},
|
|
{
|
|
"epoch": 2.3293067226890756,
|
|
"grad_norm": 7.022775976264377,
|
|
"learning_rate": 1.449371952161191e-06,
|
|
"loss": 0.26034629344940186,
|
|
"step": 4435
|
|
},
|
|
{
|
|
"epoch": 2.3298319327731094,
|
|
"grad_norm": 8.971793472401474,
|
|
"learning_rate": 1.4472209453880831e-06,
|
|
"loss": 0.992083728313446,
|
|
"step": 4436
|
|
},
|
|
{
|
|
"epoch": 2.330357142857143,
|
|
"grad_norm": 14.57673993175187,
|
|
"learning_rate": 1.4450712658312356e-06,
|
|
"loss": 0.3485121726989746,
|
|
"step": 4437
|
|
},
|
|
{
|
|
"epoch": 2.3308823529411766,
|
|
"grad_norm": 10.92881401042016,
|
|
"learning_rate": 1.4429229142937062e-06,
|
|
"loss": 0.29710543155670166,
|
|
"step": 4438
|
|
},
|
|
{
|
|
"epoch": 2.33140756302521,
|
|
"grad_norm": 5.696441023658666,
|
|
"learning_rate": 1.4407758915780578e-06,
|
|
"loss": 0.19231635332107544,
|
|
"step": 4439
|
|
},
|
|
{
|
|
"epoch": 2.331932773109244,
|
|
"grad_norm": 12.725226143271817,
|
|
"learning_rate": 1.4386301984863548e-06,
|
|
"loss": 0.7229375839233398,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 2.3324579831932772,
|
|
"grad_norm": 11.279211075275676,
|
|
"learning_rate": 1.43648583582017e-06,
|
|
"loss": 0.4464646577835083,
|
|
"step": 4441
|
|
},
|
|
{
|
|
"epoch": 2.332983193277311,
|
|
"grad_norm": 10.586498248344075,
|
|
"learning_rate": 1.4343428043805734e-06,
|
|
"loss": 0.2856397330760956,
|
|
"step": 4442
|
|
},
|
|
{
|
|
"epoch": 2.3335084033613445,
|
|
"grad_norm": 8.610565290612234,
|
|
"learning_rate": 1.432201104968141e-06,
|
|
"loss": 0.6283878087997437,
|
|
"step": 4443
|
|
},
|
|
{
|
|
"epoch": 2.3340336134453783,
|
|
"grad_norm": 12.3250343639634,
|
|
"learning_rate": 1.4300607383829495e-06,
|
|
"loss": 0.3797636032104492,
|
|
"step": 4444
|
|
},
|
|
{
|
|
"epoch": 2.3345588235294117,
|
|
"grad_norm": 15.96257785335536,
|
|
"learning_rate": 1.4279217054245793e-06,
|
|
"loss": 0.6095945835113525,
|
|
"step": 4445
|
|
},
|
|
{
|
|
"epoch": 2.3350840336134455,
|
|
"grad_norm": 10.161222958965807,
|
|
"learning_rate": 1.4257840068921103e-06,
|
|
"loss": 0.2816503942012787,
|
|
"step": 4446
|
|
},
|
|
{
|
|
"epoch": 2.335609243697479,
|
|
"grad_norm": 13.669656851961088,
|
|
"learning_rate": 1.423647643584125e-06,
|
|
"loss": 0.26294994354248047,
|
|
"step": 4447
|
|
},
|
|
{
|
|
"epoch": 2.3361344537815127,
|
|
"grad_norm": 12.89727894304432,
|
|
"learning_rate": 1.4215126162987097e-06,
|
|
"loss": 0.49291765689849854,
|
|
"step": 4448
|
|
},
|
|
{
|
|
"epoch": 2.336659663865546,
|
|
"grad_norm": 10.492355561706045,
|
|
"learning_rate": 1.4193789258334485e-06,
|
|
"loss": 0.24138548970222473,
|
|
"step": 4449
|
|
},
|
|
{
|
|
"epoch": 2.33718487394958,
|
|
"grad_norm": 8.957404872028208,
|
|
"learning_rate": 1.4172465729854262e-06,
|
|
"loss": 0.5285111665725708,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 2.3377100840336134,
|
|
"grad_norm": 5.983758652579017,
|
|
"learning_rate": 1.4151155585512288e-06,
|
|
"loss": 0.13447096943855286,
|
|
"step": 4451
|
|
},
|
|
{
|
|
"epoch": 2.338235294117647,
|
|
"grad_norm": 7.098674025642296,
|
|
"learning_rate": 1.4129858833269422e-06,
|
|
"loss": 0.288321852684021,
|
|
"step": 4452
|
|
},
|
|
{
|
|
"epoch": 2.3387605042016806,
|
|
"grad_norm": 18.10725020355562,
|
|
"learning_rate": 1.4108575481081522e-06,
|
|
"loss": 0.5799267888069153,
|
|
"step": 4453
|
|
},
|
|
{
|
|
"epoch": 2.3392857142857144,
|
|
"grad_norm": 8.769690651248249,
|
|
"learning_rate": 1.4087305536899415e-06,
|
|
"loss": 0.7320141792297363,
|
|
"step": 4454
|
|
},
|
|
{
|
|
"epoch": 2.339810924369748,
|
|
"grad_norm": 8.790205240868099,
|
|
"learning_rate": 1.406604900866898e-06,
|
|
"loss": 0.8309720158576965,
|
|
"step": 4455
|
|
},
|
|
{
|
|
"epoch": 2.3403361344537816,
|
|
"grad_norm": 11.898972273798018,
|
|
"learning_rate": 1.404480590433102e-06,
|
|
"loss": 0.8555180430412292,
|
|
"step": 4456
|
|
},
|
|
{
|
|
"epoch": 2.340861344537815,
|
|
"grad_norm": 8.95813991738779,
|
|
"learning_rate": 1.4023576231821362e-06,
|
|
"loss": 0.5222865343093872,
|
|
"step": 4457
|
|
},
|
|
{
|
|
"epoch": 2.341386554621849,
|
|
"grad_norm": 9.160181655448328,
|
|
"learning_rate": 1.4002359999070797e-06,
|
|
"loss": 0.6912654638290405,
|
|
"step": 4458
|
|
},
|
|
{
|
|
"epoch": 2.3419117647058822,
|
|
"grad_norm": 9.851244147965717,
|
|
"learning_rate": 1.3981157214005098e-06,
|
|
"loss": 0.36685875058174133,
|
|
"step": 4459
|
|
},
|
|
{
|
|
"epoch": 2.342436974789916,
|
|
"grad_norm": 8.409085857554166,
|
|
"learning_rate": 1.3959967884545016e-06,
|
|
"loss": 0.16158661246299744,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 2.3429621848739495,
|
|
"grad_norm": 10.50898618550427,
|
|
"learning_rate": 1.3938792018606278e-06,
|
|
"loss": 0.6709229946136475,
|
|
"step": 4461
|
|
},
|
|
{
|
|
"epoch": 2.3434873949579833,
|
|
"grad_norm": 11.521710950342648,
|
|
"learning_rate": 1.391762962409957e-06,
|
|
"loss": 0.36330825090408325,
|
|
"step": 4462
|
|
},
|
|
{
|
|
"epoch": 2.3440126050420167,
|
|
"grad_norm": 10.648231431598703,
|
|
"learning_rate": 1.3896480708930576e-06,
|
|
"loss": 0.45184430480003357,
|
|
"step": 4463
|
|
},
|
|
{
|
|
"epoch": 2.3445378151260505,
|
|
"grad_norm": 10.719388883213727,
|
|
"learning_rate": 1.3875345280999913e-06,
|
|
"loss": 0.6399360299110413,
|
|
"step": 4464
|
|
},
|
|
{
|
|
"epoch": 2.345063025210084,
|
|
"grad_norm": 11.089626478412704,
|
|
"learning_rate": 1.3854223348203171e-06,
|
|
"loss": 0.660666823387146,
|
|
"step": 4465
|
|
},
|
|
{
|
|
"epoch": 2.3455882352941178,
|
|
"grad_norm": 8.80264843882407,
|
|
"learning_rate": 1.3833114918430896e-06,
|
|
"loss": 0.2146662175655365,
|
|
"step": 4466
|
|
},
|
|
{
|
|
"epoch": 2.346113445378151,
|
|
"grad_norm": 10.304086170519128,
|
|
"learning_rate": 1.3812019999568588e-06,
|
|
"loss": 0.5874398350715637,
|
|
"step": 4467
|
|
},
|
|
{
|
|
"epoch": 2.346638655462185,
|
|
"grad_norm": 10.695658890377118,
|
|
"learning_rate": 1.3790938599496712e-06,
|
|
"loss": 0.37619832158088684,
|
|
"step": 4468
|
|
},
|
|
{
|
|
"epoch": 2.3471638655462184,
|
|
"grad_norm": 12.79180398066583,
|
|
"learning_rate": 1.376987072609065e-06,
|
|
"loss": 1.54884934425354,
|
|
"step": 4469
|
|
},
|
|
{
|
|
"epoch": 2.347689075630252,
|
|
"grad_norm": 16.510955416054422,
|
|
"learning_rate": 1.3748816387220787e-06,
|
|
"loss": 0.5533466339111328,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 2.3482142857142856,
|
|
"grad_norm": 11.831295081158444,
|
|
"learning_rate": 1.3727775590752413e-06,
|
|
"loss": 0.44226396083831787,
|
|
"step": 4471
|
|
},
|
|
{
|
|
"epoch": 2.3487394957983194,
|
|
"grad_norm": 12.238349594637782,
|
|
"learning_rate": 1.370674834454575e-06,
|
|
"loss": 0.7285857796669006,
|
|
"step": 4472
|
|
},
|
|
{
|
|
"epoch": 2.349264705882353,
|
|
"grad_norm": 7.36240551837595,
|
|
"learning_rate": 1.368573465645599e-06,
|
|
"loss": 0.19481465220451355,
|
|
"step": 4473
|
|
},
|
|
{
|
|
"epoch": 2.3497899159663866,
|
|
"grad_norm": 13.509107523405708,
|
|
"learning_rate": 1.366473453433323e-06,
|
|
"loss": 0.27436989545822144,
|
|
"step": 4474
|
|
},
|
|
{
|
|
"epoch": 2.35031512605042,
|
|
"grad_norm": 11.84939142092099,
|
|
"learning_rate": 1.3643747986022521e-06,
|
|
"loss": 0.5133126974105835,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 2.350840336134454,
|
|
"grad_norm": 8.796568434767028,
|
|
"learning_rate": 1.3622775019363827e-06,
|
|
"loss": 0.4648074209690094,
|
|
"step": 4476
|
|
},
|
|
{
|
|
"epoch": 2.3513655462184873,
|
|
"grad_norm": 9.094964859815823,
|
|
"learning_rate": 1.360181564219204e-06,
|
|
"loss": 0.4042467772960663,
|
|
"step": 4477
|
|
},
|
|
{
|
|
"epoch": 2.351890756302521,
|
|
"grad_norm": 8.319322170075091,
|
|
"learning_rate": 1.358086986233701e-06,
|
|
"loss": 0.5059428215026855,
|
|
"step": 4478
|
|
},
|
|
{
|
|
"epoch": 2.3524159663865545,
|
|
"grad_norm": 9.77888075804398,
|
|
"learning_rate": 1.3559937687623458e-06,
|
|
"loss": 0.6230376362800598,
|
|
"step": 4479
|
|
},
|
|
{
|
|
"epoch": 2.3529411764705883,
|
|
"grad_norm": 10.28907790673492,
|
|
"learning_rate": 1.3539019125871057e-06,
|
|
"loss": 0.6566630005836487,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 2.3534663865546217,
|
|
"grad_norm": 11.750925105401295,
|
|
"learning_rate": 1.351811418489436e-06,
|
|
"loss": 0.41594552993774414,
|
|
"step": 4481
|
|
},
|
|
{
|
|
"epoch": 2.3539915966386555,
|
|
"grad_norm": 15.97060569749755,
|
|
"learning_rate": 1.3497222872502868e-06,
|
|
"loss": 0.23841039836406708,
|
|
"step": 4482
|
|
},
|
|
{
|
|
"epoch": 2.354516806722689,
|
|
"grad_norm": 9.445741091086223,
|
|
"learning_rate": 1.3476345196500978e-06,
|
|
"loss": 0.5826966166496277,
|
|
"step": 4483
|
|
},
|
|
{
|
|
"epoch": 2.3550420168067228,
|
|
"grad_norm": 10.5760152407604,
|
|
"learning_rate": 1.3455481164687967e-06,
|
|
"loss": 0.32878589630126953,
|
|
"step": 4484
|
|
},
|
|
{
|
|
"epoch": 2.355567226890756,
|
|
"grad_norm": 7.098752200527344,
|
|
"learning_rate": 1.3434630784858067e-06,
|
|
"loss": 0.5044801235198975,
|
|
"step": 4485
|
|
},
|
|
{
|
|
"epoch": 2.35609243697479,
|
|
"grad_norm": 14.220339734493516,
|
|
"learning_rate": 1.3413794064800373e-06,
|
|
"loss": 0.45580434799194336,
|
|
"step": 4486
|
|
},
|
|
{
|
|
"epoch": 2.3566176470588234,
|
|
"grad_norm": 8.36565755090009,
|
|
"learning_rate": 1.3392971012298883e-06,
|
|
"loss": 0.37136155366897583,
|
|
"step": 4487
|
|
},
|
|
{
|
|
"epoch": 2.357142857142857,
|
|
"grad_norm": 10.728025326411071,
|
|
"learning_rate": 1.3372161635132486e-06,
|
|
"loss": 0.2327875792980194,
|
|
"step": 4488
|
|
},
|
|
{
|
|
"epoch": 2.3576680672268906,
|
|
"grad_norm": 9.83698742983316,
|
|
"learning_rate": 1.335136594107498e-06,
|
|
"loss": 0.36251547932624817,
|
|
"step": 4489
|
|
},
|
|
{
|
|
"epoch": 2.3581932773109244,
|
|
"grad_norm": 10.826874145764695,
|
|
"learning_rate": 1.333058393789503e-06,
|
|
"loss": 0.24773839116096497,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 2.358718487394958,
|
|
"grad_norm": 13.734248774530457,
|
|
"learning_rate": 1.3309815633356181e-06,
|
|
"loss": 0.5755770206451416,
|
|
"step": 4491
|
|
},
|
|
{
|
|
"epoch": 2.3592436974789917,
|
|
"grad_norm": 12.563413992488053,
|
|
"learning_rate": 1.328906103521691e-06,
|
|
"loss": 0.4366951882839203,
|
|
"step": 4492
|
|
},
|
|
{
|
|
"epoch": 2.359768907563025,
|
|
"grad_norm": 8.78774750837823,
|
|
"learning_rate": 1.3268320151230518e-06,
|
|
"loss": 1.001713514328003,
|
|
"step": 4493
|
|
},
|
|
{
|
|
"epoch": 2.360294117647059,
|
|
"grad_norm": 15.69595928553154,
|
|
"learning_rate": 1.3247592989145213e-06,
|
|
"loss": 1.3247722387313843,
|
|
"step": 4494
|
|
},
|
|
{
|
|
"epoch": 2.3608193277310923,
|
|
"grad_norm": 7.494338259169056,
|
|
"learning_rate": 1.322687955670406e-06,
|
|
"loss": 0.443425714969635,
|
|
"step": 4495
|
|
},
|
|
{
|
|
"epoch": 2.361344537815126,
|
|
"grad_norm": 12.271734786939172,
|
|
"learning_rate": 1.3206179861645003e-06,
|
|
"loss": 0.8458099365234375,
|
|
"step": 4496
|
|
},
|
|
{
|
|
"epoch": 2.3618697478991595,
|
|
"grad_norm": 8.775425752530367,
|
|
"learning_rate": 1.3185493911700854e-06,
|
|
"loss": 0.4446178674697876,
|
|
"step": 4497
|
|
},
|
|
{
|
|
"epoch": 2.3623949579831933,
|
|
"grad_norm": 16.956798719350704,
|
|
"learning_rate": 1.3164821714599296e-06,
|
|
"loss": 1.2176728248596191,
|
|
"step": 4498
|
|
},
|
|
{
|
|
"epoch": 2.3629201680672267,
|
|
"grad_norm": 12.932713028329136,
|
|
"learning_rate": 1.3144163278062848e-06,
|
|
"loss": 0.21005836129188538,
|
|
"step": 4499
|
|
},
|
|
{
|
|
"epoch": 2.3634453781512605,
|
|
"grad_norm": 11.801717284369776,
|
|
"learning_rate": 1.3123518609808938e-06,
|
|
"loss": 0.6239449381828308,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 2.363970588235294,
|
|
"grad_norm": 8.704409848054516,
|
|
"learning_rate": 1.3102887717549812e-06,
|
|
"loss": 0.33125782012939453,
|
|
"step": 4501
|
|
},
|
|
{
|
|
"epoch": 2.3644957983193278,
|
|
"grad_norm": 10.073395539635998,
|
|
"learning_rate": 1.308227060899257e-06,
|
|
"loss": 0.4214559495449066,
|
|
"step": 4502
|
|
},
|
|
{
|
|
"epoch": 2.365021008403361,
|
|
"grad_norm": 17.29668995113468,
|
|
"learning_rate": 1.3061667291839182e-06,
|
|
"loss": 0.3144763112068176,
|
|
"step": 4503
|
|
},
|
|
{
|
|
"epoch": 2.365546218487395,
|
|
"grad_norm": 12.387739143754072,
|
|
"learning_rate": 1.3041077773786448e-06,
|
|
"loss": 0.39141130447387695,
|
|
"step": 4504
|
|
},
|
|
{
|
|
"epoch": 2.3660714285714284,
|
|
"grad_norm": 9.140243501641983,
|
|
"learning_rate": 1.302050206252602e-06,
|
|
"loss": 0.39588499069213867,
|
|
"step": 4505
|
|
},
|
|
{
|
|
"epoch": 2.366596638655462,
|
|
"grad_norm": 8.744832052666892,
|
|
"learning_rate": 1.299994016574439e-06,
|
|
"loss": 0.3863181471824646,
|
|
"step": 4506
|
|
},
|
|
{
|
|
"epoch": 2.3671218487394956,
|
|
"grad_norm": 10.049890083498987,
|
|
"learning_rate": 1.297939209112291e-06,
|
|
"loss": 0.3938748240470886,
|
|
"step": 4507
|
|
},
|
|
{
|
|
"epoch": 2.3676470588235294,
|
|
"grad_norm": 9.192739529161791,
|
|
"learning_rate": 1.295885784633774e-06,
|
|
"loss": 0.7387629151344299,
|
|
"step": 4508
|
|
},
|
|
{
|
|
"epoch": 2.3681722689075633,
|
|
"grad_norm": 11.25074840431641,
|
|
"learning_rate": 1.2938337439059868e-06,
|
|
"loss": 0.9342288374900818,
|
|
"step": 4509
|
|
},
|
|
{
|
|
"epoch": 2.3686974789915967,
|
|
"grad_norm": 8.787393353785076,
|
|
"learning_rate": 1.2917830876955161e-06,
|
|
"loss": 0.4711243808269501,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 2.36922268907563,
|
|
"grad_norm": 10.92742341460336,
|
|
"learning_rate": 1.289733816768427e-06,
|
|
"loss": 0.3783642649650574,
|
|
"step": 4511
|
|
},
|
|
{
|
|
"epoch": 2.369747899159664,
|
|
"grad_norm": 10.595865389158499,
|
|
"learning_rate": 1.2876859318902673e-06,
|
|
"loss": 0.4414080083370209,
|
|
"step": 4512
|
|
},
|
|
{
|
|
"epoch": 2.3702731092436977,
|
|
"grad_norm": 9.818670326151684,
|
|
"learning_rate": 1.2856394338260691e-06,
|
|
"loss": 0.4777085483074188,
|
|
"step": 4513
|
|
},
|
|
{
|
|
"epoch": 2.370798319327731,
|
|
"grad_norm": 8.044330388133467,
|
|
"learning_rate": 1.2835943233403448e-06,
|
|
"loss": 0.37345853447914124,
|
|
"step": 4514
|
|
},
|
|
{
|
|
"epoch": 2.3713235294117645,
|
|
"grad_norm": 8.44086601901999,
|
|
"learning_rate": 1.2815506011970874e-06,
|
|
"loss": 0.2127484381198883,
|
|
"step": 4515
|
|
},
|
|
{
|
|
"epoch": 2.3718487394957983,
|
|
"grad_norm": 7.6709255750210525,
|
|
"learning_rate": 1.2795082681597753e-06,
|
|
"loss": 0.5472914576530457,
|
|
"step": 4516
|
|
},
|
|
{
|
|
"epoch": 2.372373949579832,
|
|
"grad_norm": 14.223232908912985,
|
|
"learning_rate": 1.2774673249913656e-06,
|
|
"loss": 0.49846890568733215,
|
|
"step": 4517
|
|
},
|
|
{
|
|
"epoch": 2.3728991596638656,
|
|
"grad_norm": 9.019624708036671,
|
|
"learning_rate": 1.2754277724542945e-06,
|
|
"loss": 0.5183377265930176,
|
|
"step": 4518
|
|
},
|
|
{
|
|
"epoch": 2.373424369747899,
|
|
"grad_norm": 15.58524676012771,
|
|
"learning_rate": 1.273389611310481e-06,
|
|
"loss": 0.34386664628982544,
|
|
"step": 4519
|
|
},
|
|
{
|
|
"epoch": 2.3739495798319328,
|
|
"grad_norm": 14.45139815052486,
|
|
"learning_rate": 1.2713528423213235e-06,
|
|
"loss": 0.4957372546195984,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 2.3744747899159666,
|
|
"grad_norm": 12.608215426959852,
|
|
"learning_rate": 1.2693174662477003e-06,
|
|
"loss": 0.7115473747253418,
|
|
"step": 4521
|
|
},
|
|
{
|
|
"epoch": 2.375,
|
|
"grad_norm": 9.654962039546547,
|
|
"learning_rate": 1.2672834838499699e-06,
|
|
"loss": 0.2595762014389038,
|
|
"step": 4522
|
|
},
|
|
{
|
|
"epoch": 2.3755252100840334,
|
|
"grad_norm": 9.931027827541035,
|
|
"learning_rate": 1.2652508958879671e-06,
|
|
"loss": 0.34179964661598206,
|
|
"step": 4523
|
|
},
|
|
{
|
|
"epoch": 2.3760504201680672,
|
|
"grad_norm": 7.314600864411473,
|
|
"learning_rate": 1.263219703121013e-06,
|
|
"loss": 0.21406862139701843,
|
|
"step": 4524
|
|
},
|
|
{
|
|
"epoch": 2.376575630252101,
|
|
"grad_norm": 13.579760846219973,
|
|
"learning_rate": 1.2611899063079002e-06,
|
|
"loss": 0.3097609281539917,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 2.3771008403361344,
|
|
"grad_norm": 13.948578777970463,
|
|
"learning_rate": 1.259161506206903e-06,
|
|
"loss": 0.3892877399921417,
|
|
"step": 4526
|
|
},
|
|
{
|
|
"epoch": 2.377626050420168,
|
|
"grad_norm": 12.728954798673547,
|
|
"learning_rate": 1.257134503575773e-06,
|
|
"loss": 0.49235785007476807,
|
|
"step": 4527
|
|
},
|
|
{
|
|
"epoch": 2.3781512605042017,
|
|
"grad_norm": 9.33542032962396,
|
|
"learning_rate": 1.2551088991717409e-06,
|
|
"loss": 0.5548625588417053,
|
|
"step": 4528
|
|
},
|
|
{
|
|
"epoch": 2.3786764705882355,
|
|
"grad_norm": 14.105576588004832,
|
|
"learning_rate": 1.253084693751514e-06,
|
|
"loss": 0.32942134141921997,
|
|
"step": 4529
|
|
},
|
|
{
|
|
"epoch": 2.379201680672269,
|
|
"grad_norm": 15.890278297736177,
|
|
"learning_rate": 1.2510618880712755e-06,
|
|
"loss": 0.7341816425323486,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 2.3797268907563023,
|
|
"grad_norm": 14.638612778437102,
|
|
"learning_rate": 1.2490404828866914e-06,
|
|
"loss": 1.1487144231796265,
|
|
"step": 4531
|
|
},
|
|
{
|
|
"epoch": 2.380252100840336,
|
|
"grad_norm": 10.822677428034597,
|
|
"learning_rate": 1.2470204789528983e-06,
|
|
"loss": 0.27511245012283325,
|
|
"step": 4532
|
|
},
|
|
{
|
|
"epoch": 2.38077731092437,
|
|
"grad_norm": 10.529355342309076,
|
|
"learning_rate": 1.245001877024512e-06,
|
|
"loss": 0.5962315201759338,
|
|
"step": 4533
|
|
},
|
|
{
|
|
"epoch": 2.3813025210084033,
|
|
"grad_norm": 14.016859086216845,
|
|
"learning_rate": 1.2429846778556242e-06,
|
|
"loss": 0.5010058879852295,
|
|
"step": 4534
|
|
},
|
|
{
|
|
"epoch": 2.3818277310924367,
|
|
"grad_norm": 13.89031281850282,
|
|
"learning_rate": 1.2409688821998022e-06,
|
|
"loss": 0.5231708884239197,
|
|
"step": 4535
|
|
},
|
|
{
|
|
"epoch": 2.3823529411764706,
|
|
"grad_norm": 10.77777049343026,
|
|
"learning_rate": 1.2389544908100902e-06,
|
|
"loss": 0.617600679397583,
|
|
"step": 4536
|
|
},
|
|
{
|
|
"epoch": 2.3828781512605044,
|
|
"grad_norm": 7.056643902412701,
|
|
"learning_rate": 1.2369415044390055e-06,
|
|
"loss": 0.27999234199523926,
|
|
"step": 4537
|
|
},
|
|
{
|
|
"epoch": 2.383403361344538,
|
|
"grad_norm": 20.01932814320385,
|
|
"learning_rate": 1.2349299238385442e-06,
|
|
"loss": 0.8885968327522278,
|
|
"step": 4538
|
|
},
|
|
{
|
|
"epoch": 2.3839285714285716,
|
|
"grad_norm": 12.602010743357127,
|
|
"learning_rate": 1.2329197497601742e-06,
|
|
"loss": 0.41816869378089905,
|
|
"step": 4539
|
|
},
|
|
{
|
|
"epoch": 2.384453781512605,
|
|
"grad_norm": 10.61641237261424,
|
|
"learning_rate": 1.2309109829548382e-06,
|
|
"loss": 0.5384302139282227,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 2.384978991596639,
|
|
"grad_norm": 24.207108774787446,
|
|
"learning_rate": 1.228903624172954e-06,
|
|
"loss": 0.5781921148300171,
|
|
"step": 4541
|
|
},
|
|
{
|
|
"epoch": 2.3855042016806722,
|
|
"grad_norm": 13.753831129571772,
|
|
"learning_rate": 1.226897674164414e-06,
|
|
"loss": 0.3146056532859802,
|
|
"step": 4542
|
|
},
|
|
{
|
|
"epoch": 2.386029411764706,
|
|
"grad_norm": 21.067414772277196,
|
|
"learning_rate": 1.224893133678583e-06,
|
|
"loss": 0.46929800510406494,
|
|
"step": 4543
|
|
},
|
|
{
|
|
"epoch": 2.3865546218487395,
|
|
"grad_norm": 19.178368373358886,
|
|
"learning_rate": 1.2228900034642992e-06,
|
|
"loss": 0.4543416202068329,
|
|
"step": 4544
|
|
},
|
|
{
|
|
"epoch": 2.3870798319327733,
|
|
"grad_norm": 8.590587353420862,
|
|
"learning_rate": 1.220888284269874e-06,
|
|
"loss": 0.2502267360687256,
|
|
"step": 4545
|
|
},
|
|
{
|
|
"epoch": 2.3876050420168067,
|
|
"grad_norm": 8.267644715887172,
|
|
"learning_rate": 1.218887976843095e-06,
|
|
"loss": 0.403756707906723,
|
|
"step": 4546
|
|
},
|
|
{
|
|
"epoch": 2.3881302521008405,
|
|
"grad_norm": 27.172483857051382,
|
|
"learning_rate": 1.2168890819312173e-06,
|
|
"loss": 1.945648431777954,
|
|
"step": 4547
|
|
},
|
|
{
|
|
"epoch": 2.388655462184874,
|
|
"grad_norm": 11.871262692990832,
|
|
"learning_rate": 1.2148916002809719e-06,
|
|
"loss": 0.4905507266521454,
|
|
"step": 4548
|
|
},
|
|
{
|
|
"epoch": 2.3891806722689077,
|
|
"grad_norm": 4.4824523843108945,
|
|
"learning_rate": 1.2128955326385595e-06,
|
|
"loss": 0.2541502118110657,
|
|
"step": 4549
|
|
},
|
|
{
|
|
"epoch": 2.389705882352941,
|
|
"grad_norm": 8.654776147893612,
|
|
"learning_rate": 1.210900879749654e-06,
|
|
"loss": 0.37291058897972107,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 2.390231092436975,
|
|
"grad_norm": 8.544907980266823,
|
|
"learning_rate": 1.208907642359401e-06,
|
|
"loss": 0.19303686916828156,
|
|
"step": 4551
|
|
},
|
|
{
|
|
"epoch": 2.3907563025210083,
|
|
"grad_norm": 9.245516836008381,
|
|
"learning_rate": 1.2069158212124148e-06,
|
|
"loss": 0.36492592096328735,
|
|
"step": 4552
|
|
},
|
|
{
|
|
"epoch": 2.391281512605042,
|
|
"grad_norm": 11.486559035082562,
|
|
"learning_rate": 1.2049254170527857e-06,
|
|
"loss": 0.5990746021270752,
|
|
"step": 4553
|
|
},
|
|
{
|
|
"epoch": 2.3918067226890756,
|
|
"grad_norm": 10.642291366806228,
|
|
"learning_rate": 1.2029364306240703e-06,
|
|
"loss": 0.34983572363853455,
|
|
"step": 4554
|
|
},
|
|
{
|
|
"epoch": 2.3923319327731094,
|
|
"grad_norm": 14.700654848807085,
|
|
"learning_rate": 1.200948862669296e-06,
|
|
"loss": 0.3403421640396118,
|
|
"step": 4555
|
|
},
|
|
{
|
|
"epoch": 2.392857142857143,
|
|
"grad_norm": 10.654595923642477,
|
|
"learning_rate": 1.1989627139309617e-06,
|
|
"loss": 0.3918571472167969,
|
|
"step": 4556
|
|
},
|
|
{
|
|
"epoch": 2.3933823529411766,
|
|
"grad_norm": 10.624715948046202,
|
|
"learning_rate": 1.196977985151036e-06,
|
|
"loss": 0.6162440776824951,
|
|
"step": 4557
|
|
},
|
|
{
|
|
"epoch": 2.39390756302521,
|
|
"grad_norm": 10.207329269807822,
|
|
"learning_rate": 1.1949946770709558e-06,
|
|
"loss": 0.32177025079727173,
|
|
"step": 4558
|
|
},
|
|
{
|
|
"epoch": 2.394432773109244,
|
|
"grad_norm": 12.335230457637099,
|
|
"learning_rate": 1.1930127904316286e-06,
|
|
"loss": 1.0117385387420654,
|
|
"step": 4559
|
|
},
|
|
{
|
|
"epoch": 2.3949579831932772,
|
|
"grad_norm": 8.243611243776725,
|
|
"learning_rate": 1.1910323259734286e-06,
|
|
"loss": 0.35751795768737793,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 2.395483193277311,
|
|
"grad_norm": 8.818503755815009,
|
|
"learning_rate": 1.1890532844362035e-06,
|
|
"loss": 0.3382008671760559,
|
|
"step": 4561
|
|
},
|
|
{
|
|
"epoch": 2.3960084033613445,
|
|
"grad_norm": 11.812357760685854,
|
|
"learning_rate": 1.1870756665592648e-06,
|
|
"loss": 0.3039571940898895,
|
|
"step": 4562
|
|
},
|
|
{
|
|
"epoch": 2.3965336134453783,
|
|
"grad_norm": 8.267801937979563,
|
|
"learning_rate": 1.1850994730813937e-06,
|
|
"loss": 0.5322685837745667,
|
|
"step": 4563
|
|
},
|
|
{
|
|
"epoch": 2.3970588235294117,
|
|
"grad_norm": 11.86240748917353,
|
|
"learning_rate": 1.1831247047408396e-06,
|
|
"loss": 0.6107098460197449,
|
|
"step": 4564
|
|
},
|
|
{
|
|
"epoch": 2.3975840336134455,
|
|
"grad_norm": 9.478992360920858,
|
|
"learning_rate": 1.1811513622753196e-06,
|
|
"loss": 0.3338160812854767,
|
|
"step": 4565
|
|
},
|
|
{
|
|
"epoch": 2.398109243697479,
|
|
"grad_norm": 16.787931283767268,
|
|
"learning_rate": 1.1791794464220169e-06,
|
|
"loss": 0.7742102742195129,
|
|
"step": 4566
|
|
},
|
|
{
|
|
"epoch": 2.3986344537815127,
|
|
"grad_norm": 7.128087325207843,
|
|
"learning_rate": 1.1772089579175816e-06,
|
|
"loss": 0.2856733798980713,
|
|
"step": 4567
|
|
},
|
|
{
|
|
"epoch": 2.399159663865546,
|
|
"grad_norm": 10.84834945215509,
|
|
"learning_rate": 1.175239897498135e-06,
|
|
"loss": 1.2364510297775269,
|
|
"step": 4568
|
|
},
|
|
{
|
|
"epoch": 2.39968487394958,
|
|
"grad_norm": 14.669079873648037,
|
|
"learning_rate": 1.1732722658992597e-06,
|
|
"loss": 0.47522950172424316,
|
|
"step": 4569
|
|
},
|
|
{
|
|
"epoch": 2.4002100840336134,
|
|
"grad_norm": 10.01277105949357,
|
|
"learning_rate": 1.171306063856006e-06,
|
|
"loss": 0.9380142688751221,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 2.400735294117647,
|
|
"grad_norm": 8.870561127927482,
|
|
"learning_rate": 1.1693412921028913e-06,
|
|
"loss": 0.34390684962272644,
|
|
"step": 4571
|
|
},
|
|
{
|
|
"epoch": 2.4012605042016806,
|
|
"grad_norm": 8.102666776910846,
|
|
"learning_rate": 1.167377951373897e-06,
|
|
"loss": 0.3068695664405823,
|
|
"step": 4572
|
|
},
|
|
{
|
|
"epoch": 2.4017857142857144,
|
|
"grad_norm": 11.733510445020583,
|
|
"learning_rate": 1.1654160424024718e-06,
|
|
"loss": 0.3494885563850403,
|
|
"step": 4573
|
|
},
|
|
{
|
|
"epoch": 2.402310924369748,
|
|
"grad_norm": 9.119536859268015,
|
|
"learning_rate": 1.1634555659215268e-06,
|
|
"loss": 0.4633587598800659,
|
|
"step": 4574
|
|
},
|
|
{
|
|
"epoch": 2.4028361344537816,
|
|
"grad_norm": 11.788540750458916,
|
|
"learning_rate": 1.1614965226634423e-06,
|
|
"loss": 0.3058873414993286,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 2.403361344537815,
|
|
"grad_norm": 8.634211487623894,
|
|
"learning_rate": 1.1595389133600594e-06,
|
|
"loss": 0.3870164155960083,
|
|
"step": 4576
|
|
},
|
|
{
|
|
"epoch": 2.403886554621849,
|
|
"grad_norm": 13.009369084404737,
|
|
"learning_rate": 1.1575827387426846e-06,
|
|
"loss": 0.619822084903717,
|
|
"step": 4577
|
|
},
|
|
{
|
|
"epoch": 2.4044117647058822,
|
|
"grad_norm": 7.596472587253989,
|
|
"learning_rate": 1.1556279995420888e-06,
|
|
"loss": 0.35728439688682556,
|
|
"step": 4578
|
|
},
|
|
{
|
|
"epoch": 2.404936974789916,
|
|
"grad_norm": 8.100305393677553,
|
|
"learning_rate": 1.1536746964885065e-06,
|
|
"loss": 0.3426452875137329,
|
|
"step": 4579
|
|
},
|
|
{
|
|
"epoch": 2.4054621848739495,
|
|
"grad_norm": 10.260781417810852,
|
|
"learning_rate": 1.1517228303116356e-06,
|
|
"loss": 0.28849175572395325,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 2.4059873949579833,
|
|
"grad_norm": 10.602548146202897,
|
|
"learning_rate": 1.149772401740637e-06,
|
|
"loss": 0.3924471139907837,
|
|
"step": 4581
|
|
},
|
|
{
|
|
"epoch": 2.4065126050420167,
|
|
"grad_norm": 8.276072794012705,
|
|
"learning_rate": 1.1478234115041332e-06,
|
|
"loss": 0.3332470655441284,
|
|
"step": 4582
|
|
},
|
|
{
|
|
"epoch": 2.4070378151260505,
|
|
"grad_norm": 13.10012865056916,
|
|
"learning_rate": 1.1458758603302145e-06,
|
|
"loss": 0.7749272584915161,
|
|
"step": 4583
|
|
},
|
|
{
|
|
"epoch": 2.407563025210084,
|
|
"grad_norm": 7.66078655500123,
|
|
"learning_rate": 1.1439297489464285e-06,
|
|
"loss": 0.6607871055603027,
|
|
"step": 4584
|
|
},
|
|
{
|
|
"epoch": 2.4080882352941178,
|
|
"grad_norm": 20.841498297644005,
|
|
"learning_rate": 1.1419850780797864e-06,
|
|
"loss": 0.9104949235916138,
|
|
"step": 4585
|
|
},
|
|
{
|
|
"epoch": 2.408613445378151,
|
|
"grad_norm": 24.58967149440676,
|
|
"learning_rate": 1.1400418484567615e-06,
|
|
"loss": 0.4393938481807709,
|
|
"step": 4586
|
|
},
|
|
{
|
|
"epoch": 2.409138655462185,
|
|
"grad_norm": 19.30629632993457,
|
|
"learning_rate": 1.1381000608032883e-06,
|
|
"loss": 0.5510975122451782,
|
|
"step": 4587
|
|
},
|
|
{
|
|
"epoch": 2.4096638655462184,
|
|
"grad_norm": 9.060418499495135,
|
|
"learning_rate": 1.1361597158447634e-06,
|
|
"loss": 0.6518533229827881,
|
|
"step": 4588
|
|
},
|
|
{
|
|
"epoch": 2.410189075630252,
|
|
"grad_norm": 11.550628576725419,
|
|
"learning_rate": 1.1342208143060423e-06,
|
|
"loss": 0.3659060001373291,
|
|
"step": 4589
|
|
},
|
|
{
|
|
"epoch": 2.4107142857142856,
|
|
"grad_norm": 10.536082007500719,
|
|
"learning_rate": 1.1322833569114461e-06,
|
|
"loss": 0.6478267312049866,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 2.4112394957983194,
|
|
"grad_norm": 10.03293638787172,
|
|
"learning_rate": 1.1303473443847507e-06,
|
|
"loss": 0.6590147018432617,
|
|
"step": 4591
|
|
},
|
|
{
|
|
"epoch": 2.411764705882353,
|
|
"grad_norm": 8.406678997050797,
|
|
"learning_rate": 1.1284127774491965e-06,
|
|
"loss": 0.23670372366905212,
|
|
"step": 4592
|
|
},
|
|
{
|
|
"epoch": 2.4122899159663866,
|
|
"grad_norm": 11.042492722263283,
|
|
"learning_rate": 1.1264796568274811e-06,
|
|
"loss": 0.46212291717529297,
|
|
"step": 4593
|
|
},
|
|
{
|
|
"epoch": 2.41281512605042,
|
|
"grad_norm": 8.492544898311726,
|
|
"learning_rate": 1.1245479832417628e-06,
|
|
"loss": 0.7500928044319153,
|
|
"step": 4594
|
|
},
|
|
{
|
|
"epoch": 2.413340336134454,
|
|
"grad_norm": 51.10833416309785,
|
|
"learning_rate": 1.1226177574136598e-06,
|
|
"loss": 0.5376368165016174,
|
|
"step": 4595
|
|
},
|
|
{
|
|
"epoch": 2.4138655462184873,
|
|
"grad_norm": 15.47020436339206,
|
|
"learning_rate": 1.120688980064249e-06,
|
|
"loss": 0.9305119514465332,
|
|
"step": 4596
|
|
},
|
|
{
|
|
"epoch": 2.414390756302521,
|
|
"grad_norm": 12.071511895735837,
|
|
"learning_rate": 1.118761651914065e-06,
|
|
"loss": 0.3205832839012146,
|
|
"step": 4597
|
|
},
|
|
{
|
|
"epoch": 2.4149159663865545,
|
|
"grad_norm": 11.168123193679575,
|
|
"learning_rate": 1.1168357736831042e-06,
|
|
"loss": 0.3182224929332733,
|
|
"step": 4598
|
|
},
|
|
{
|
|
"epoch": 2.4154411764705883,
|
|
"grad_norm": 10.301001919075295,
|
|
"learning_rate": 1.114911346090819e-06,
|
|
"loss": 0.24900300800800323,
|
|
"step": 4599
|
|
},
|
|
{
|
|
"epoch": 2.4159663865546217,
|
|
"grad_norm": 10.871850965335218,
|
|
"learning_rate": 1.11298836985612e-06,
|
|
"loss": 0.5075720548629761,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 2.4164915966386555,
|
|
"grad_norm": 14.646679590428588,
|
|
"learning_rate": 1.1110668456973761e-06,
|
|
"loss": 0.4689010679721832,
|
|
"step": 4601
|
|
},
|
|
{
|
|
"epoch": 2.417016806722689,
|
|
"grad_norm": 23.360825741897084,
|
|
"learning_rate": 1.109146774332413e-06,
|
|
"loss": 0.6588277220726013,
|
|
"step": 4602
|
|
},
|
|
{
|
|
"epoch": 2.4175420168067228,
|
|
"grad_norm": 23.480203977740334,
|
|
"learning_rate": 1.1072281564785148e-06,
|
|
"loss": 0.6603044271469116,
|
|
"step": 4603
|
|
},
|
|
{
|
|
"epoch": 2.418067226890756,
|
|
"grad_norm": 13.877326349427491,
|
|
"learning_rate": 1.1053109928524208e-06,
|
|
"loss": 0.37659794092178345,
|
|
"step": 4604
|
|
},
|
|
{
|
|
"epoch": 2.41859243697479,
|
|
"grad_norm": 7.630531509101829,
|
|
"learning_rate": 1.10339528417033e-06,
|
|
"loss": 0.6668751239776611,
|
|
"step": 4605
|
|
},
|
|
{
|
|
"epoch": 2.4191176470588234,
|
|
"grad_norm": 9.484671306100047,
|
|
"learning_rate": 1.1014810311478956e-06,
|
|
"loss": 0.6734082102775574,
|
|
"step": 4606
|
|
},
|
|
{
|
|
"epoch": 2.419642857142857,
|
|
"grad_norm": 10.68041361148424,
|
|
"learning_rate": 1.0995682345002278e-06,
|
|
"loss": 0.45807453989982605,
|
|
"step": 4607
|
|
},
|
|
{
|
|
"epoch": 2.4201680672268906,
|
|
"grad_norm": 7.363900299142912,
|
|
"learning_rate": 1.097656894941892e-06,
|
|
"loss": 0.6644171476364136,
|
|
"step": 4608
|
|
},
|
|
{
|
|
"epoch": 2.4206932773109244,
|
|
"grad_norm": 9.935514576410666,
|
|
"learning_rate": 1.0957470131869102e-06,
|
|
"loss": 0.5023245215415955,
|
|
"step": 4609
|
|
},
|
|
{
|
|
"epoch": 2.421218487394958,
|
|
"grad_norm": 11.434976183372724,
|
|
"learning_rate": 1.0938385899487592e-06,
|
|
"loss": 0.47515398263931274,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 2.4217436974789917,
|
|
"grad_norm": 8.137973873825795,
|
|
"learning_rate": 1.0919316259403695e-06,
|
|
"loss": 0.6053848266601562,
|
|
"step": 4611
|
|
},
|
|
{
|
|
"epoch": 2.422268907563025,
|
|
"grad_norm": 15.478126748003993,
|
|
"learning_rate": 1.090026121874131e-06,
|
|
"loss": 0.8274378776550293,
|
|
"step": 4612
|
|
},
|
|
{
|
|
"epoch": 2.422794117647059,
|
|
"grad_norm": 8.113753229832033,
|
|
"learning_rate": 1.088122078461884e-06,
|
|
"loss": 0.2030077576637268,
|
|
"step": 4613
|
|
},
|
|
{
|
|
"epoch": 2.4233193277310923,
|
|
"grad_norm": 12.815663910863112,
|
|
"learning_rate": 1.0862194964149247e-06,
|
|
"loss": 0.6265841722488403,
|
|
"step": 4614
|
|
},
|
|
{
|
|
"epoch": 2.423844537815126,
|
|
"grad_norm": 15.273531269907533,
|
|
"learning_rate": 1.084318376444003e-06,
|
|
"loss": 0.186864972114563,
|
|
"step": 4615
|
|
},
|
|
{
|
|
"epoch": 2.4243697478991595,
|
|
"grad_norm": 7.960351905070217,
|
|
"learning_rate": 1.0824187192593221e-06,
|
|
"loss": 0.21888574957847595,
|
|
"step": 4616
|
|
},
|
|
{
|
|
"epoch": 2.4248949579831933,
|
|
"grad_norm": 14.13373694399719,
|
|
"learning_rate": 1.0805205255705403e-06,
|
|
"loss": 0.6205488443374634,
|
|
"step": 4617
|
|
},
|
|
{
|
|
"epoch": 2.4254201680672267,
|
|
"grad_norm": 9.753636436422939,
|
|
"learning_rate": 1.078623796086768e-06,
|
|
"loss": 0.3561756908893585,
|
|
"step": 4618
|
|
},
|
|
{
|
|
"epoch": 2.4259453781512605,
|
|
"grad_norm": 8.614605845692175,
|
|
"learning_rate": 1.0767285315165675e-06,
|
|
"loss": 0.7992842793464661,
|
|
"step": 4619
|
|
},
|
|
{
|
|
"epoch": 2.426470588235294,
|
|
"grad_norm": 5.079532214651388,
|
|
"learning_rate": 1.0748347325679575e-06,
|
|
"loss": 0.3378203809261322,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 2.4269957983193278,
|
|
"grad_norm": 12.258978016996133,
|
|
"learning_rate": 1.0729423999484062e-06,
|
|
"loss": 0.6549593210220337,
|
|
"step": 4621
|
|
},
|
|
{
|
|
"epoch": 2.427521008403361,
|
|
"grad_norm": 11.15933276488124,
|
|
"learning_rate": 1.0710515343648348e-06,
|
|
"loss": 0.6643284559249878,
|
|
"step": 4622
|
|
},
|
|
{
|
|
"epoch": 2.428046218487395,
|
|
"grad_norm": 11.934831310505624,
|
|
"learning_rate": 1.0691621365236154e-06,
|
|
"loss": 0.758434534072876,
|
|
"step": 4623
|
|
},
|
|
{
|
|
"epoch": 2.4285714285714284,
|
|
"grad_norm": 11.236140098617149,
|
|
"learning_rate": 1.0672742071305736e-06,
|
|
"loss": 0.23000317811965942,
|
|
"step": 4624
|
|
},
|
|
{
|
|
"epoch": 2.429096638655462,
|
|
"grad_norm": 10.18273884662997,
|
|
"learning_rate": 1.0653877468909857e-06,
|
|
"loss": 0.43413665890693665,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 2.4296218487394956,
|
|
"grad_norm": 10.484440270034014,
|
|
"learning_rate": 1.0635027565095779e-06,
|
|
"loss": 0.7100927233695984,
|
|
"step": 4626
|
|
},
|
|
{
|
|
"epoch": 2.4301470588235294,
|
|
"grad_norm": 10.39787756370367,
|
|
"learning_rate": 1.0616192366905303e-06,
|
|
"loss": 0.5624114274978638,
|
|
"step": 4627
|
|
},
|
|
{
|
|
"epoch": 2.4306722689075633,
|
|
"grad_norm": 10.210201466083713,
|
|
"learning_rate": 1.0597371881374713e-06,
|
|
"loss": 0.357845664024353,
|
|
"step": 4628
|
|
},
|
|
{
|
|
"epoch": 2.4311974789915967,
|
|
"grad_norm": 14.365351018327207,
|
|
"learning_rate": 1.0578566115534794e-06,
|
|
"loss": 0.5785903930664062,
|
|
"step": 4629
|
|
},
|
|
{
|
|
"epoch": 2.43172268907563,
|
|
"grad_norm": 10.595979118429634,
|
|
"learning_rate": 1.055977507641085e-06,
|
|
"loss": 0.34922948479652405,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 2.432247899159664,
|
|
"grad_norm": 9.011612235377743,
|
|
"learning_rate": 1.054099877102267e-06,
|
|
"loss": 0.5151946544647217,
|
|
"step": 4631
|
|
},
|
|
{
|
|
"epoch": 2.4327731092436977,
|
|
"grad_norm": 6.991350232267147,
|
|
"learning_rate": 1.052223720638454e-06,
|
|
"loss": 0.16994990408420563,
|
|
"step": 4632
|
|
},
|
|
{
|
|
"epoch": 2.433298319327731,
|
|
"grad_norm": 8.942996413150123,
|
|
"learning_rate": 1.0503490389505244e-06,
|
|
"loss": 0.6988615989685059,
|
|
"step": 4633
|
|
},
|
|
{
|
|
"epoch": 2.4338235294117645,
|
|
"grad_norm": 12.219976719707963,
|
|
"learning_rate": 1.048475832738804e-06,
|
|
"loss": 0.9352719783782959,
|
|
"step": 4634
|
|
},
|
|
{
|
|
"epoch": 2.4343487394957983,
|
|
"grad_norm": 8.108446115529599,
|
|
"learning_rate": 1.0466041027030716e-06,
|
|
"loss": 0.416052907705307,
|
|
"step": 4635
|
|
},
|
|
{
|
|
"epoch": 2.434873949579832,
|
|
"grad_norm": 13.376560828922862,
|
|
"learning_rate": 1.0447338495425506e-06,
|
|
"loss": 0.7009848356246948,
|
|
"step": 4636
|
|
},
|
|
{
|
|
"epoch": 2.4353991596638656,
|
|
"grad_norm": 10.397124759629698,
|
|
"learning_rate": 1.0428650739559138e-06,
|
|
"loss": 0.3095715045928955,
|
|
"step": 4637
|
|
},
|
|
{
|
|
"epoch": 2.435924369747899,
|
|
"grad_norm": 12.509361131831966,
|
|
"learning_rate": 1.0409977766412821e-06,
|
|
"loss": 1.1042118072509766,
|
|
"step": 4638
|
|
},
|
|
{
|
|
"epoch": 2.4364495798319328,
|
|
"grad_norm": 11.823671042508225,
|
|
"learning_rate": 1.0391319582962244e-06,
|
|
"loss": 0.8671766519546509,
|
|
"step": 4639
|
|
},
|
|
{
|
|
"epoch": 2.4369747899159666,
|
|
"grad_norm": 16.64653003896913,
|
|
"learning_rate": 1.0372676196177561e-06,
|
|
"loss": 0.4441404342651367,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 2.4375,
|
|
"grad_norm": 13.246604756211456,
|
|
"learning_rate": 1.0354047613023404e-06,
|
|
"loss": 0.5973472595214844,
|
|
"step": 4641
|
|
},
|
|
{
|
|
"epoch": 2.4380252100840334,
|
|
"grad_norm": 10.513921651500217,
|
|
"learning_rate": 1.0335433840458892e-06,
|
|
"loss": 0.20096644759178162,
|
|
"step": 4642
|
|
},
|
|
{
|
|
"epoch": 2.4385504201680672,
|
|
"grad_norm": 10.223128915328267,
|
|
"learning_rate": 1.0316834885437594e-06,
|
|
"loss": 0.4479983448982239,
|
|
"step": 4643
|
|
},
|
|
{
|
|
"epoch": 2.439075630252101,
|
|
"grad_norm": 14.272482738733997,
|
|
"learning_rate": 1.0298250754907534e-06,
|
|
"loss": 0.44989848136901855,
|
|
"step": 4644
|
|
},
|
|
{
|
|
"epoch": 2.4396008403361344,
|
|
"grad_norm": 12.102580308462185,
|
|
"learning_rate": 1.0279681455811219e-06,
|
|
"loss": 0.4259611964225769,
|
|
"step": 4645
|
|
},
|
|
{
|
|
"epoch": 2.440126050420168,
|
|
"grad_norm": 10.869361108084085,
|
|
"learning_rate": 1.0261126995085607e-06,
|
|
"loss": 0.42129191756248474,
|
|
"step": 4646
|
|
},
|
|
{
|
|
"epoch": 2.4406512605042017,
|
|
"grad_norm": 15.94137067423729,
|
|
"learning_rate": 1.02425873796621e-06,
|
|
"loss": 0.410157710313797,
|
|
"step": 4647
|
|
},
|
|
{
|
|
"epoch": 2.4411764705882355,
|
|
"grad_norm": 9.645977618738604,
|
|
"learning_rate": 1.0224062616466562e-06,
|
|
"loss": 0.36366206407546997,
|
|
"step": 4648
|
|
},
|
|
{
|
|
"epoch": 2.441701680672269,
|
|
"grad_norm": 9.70861359971365,
|
|
"learning_rate": 1.0205552712419343e-06,
|
|
"loss": 0.4668600559234619,
|
|
"step": 4649
|
|
},
|
|
{
|
|
"epoch": 2.4422268907563023,
|
|
"grad_norm": 9.99325401999145,
|
|
"learning_rate": 1.018705767443519e-06,
|
|
"loss": 0.48056501150131226,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 2.442752100840336,
|
|
"grad_norm": 13.475799182568382,
|
|
"learning_rate": 1.0168577509423322e-06,
|
|
"loss": 0.30501866340637207,
|
|
"step": 4651
|
|
},
|
|
{
|
|
"epoch": 2.44327731092437,
|
|
"grad_norm": 11.649082911865747,
|
|
"learning_rate": 1.0150112224287396e-06,
|
|
"loss": 0.6185335516929626,
|
|
"step": 4652
|
|
},
|
|
{
|
|
"epoch": 2.4438025210084033,
|
|
"grad_norm": 16.70253684316815,
|
|
"learning_rate": 1.013166182592551e-06,
|
|
"loss": 0.724973201751709,
|
|
"step": 4653
|
|
},
|
|
{
|
|
"epoch": 2.4443277310924367,
|
|
"grad_norm": 17.50781231837,
|
|
"learning_rate": 1.0113226321230213e-06,
|
|
"loss": 0.5617965459823608,
|
|
"step": 4654
|
|
},
|
|
{
|
|
"epoch": 2.4448529411764706,
|
|
"grad_norm": 15.26581366504748,
|
|
"learning_rate": 1.0094805717088473e-06,
|
|
"loss": 0.5633823275566101,
|
|
"step": 4655
|
|
},
|
|
{
|
|
"epoch": 2.4453781512605044,
|
|
"grad_norm": 10.232663292730718,
|
|
"learning_rate": 1.0076400020381687e-06,
|
|
"loss": 0.42383772134780884,
|
|
"step": 4656
|
|
},
|
|
{
|
|
"epoch": 2.445903361344538,
|
|
"grad_norm": 14.80627293281165,
|
|
"learning_rate": 1.0058009237985721e-06,
|
|
"loss": 0.8045518398284912,
|
|
"step": 4657
|
|
},
|
|
{
|
|
"epoch": 2.4464285714285716,
|
|
"grad_norm": 12.875540222576225,
|
|
"learning_rate": 1.003963337677083e-06,
|
|
"loss": 1.4751150608062744,
|
|
"step": 4658
|
|
},
|
|
{
|
|
"epoch": 2.446953781512605,
|
|
"grad_norm": 9.835555950015056,
|
|
"learning_rate": 1.002127244360171e-06,
|
|
"loss": 0.2266070544719696,
|
|
"step": 4659
|
|
},
|
|
{
|
|
"epoch": 2.447478991596639,
|
|
"grad_norm": 16.775725560965732,
|
|
"learning_rate": 1.0002926445337474e-06,
|
|
"loss": 2.585972309112549,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 2.4480042016806722,
|
|
"grad_norm": 17.680236712991608,
|
|
"learning_rate": 9.98459538883167e-07,
|
|
"loss": 0.5047086477279663,
|
|
"step": 4661
|
|
},
|
|
{
|
|
"epoch": 2.448529411764706,
|
|
"grad_norm": 9.336729580011761,
|
|
"learning_rate": 9.96627928093224e-07,
|
|
"loss": 0.25490984320640564,
|
|
"step": 4662
|
|
},
|
|
{
|
|
"epoch": 2.4490546218487395,
|
|
"grad_norm": 16.411493477279496,
|
|
"learning_rate": 9.947978128481556e-07,
|
|
"loss": 0.4267747402191162,
|
|
"step": 4663
|
|
},
|
|
{
|
|
"epoch": 2.4495798319327733,
|
|
"grad_norm": 12.203016330801969,
|
|
"learning_rate": 9.929691938316422e-07,
|
|
"loss": 0.36980509757995605,
|
|
"step": 4664
|
|
},
|
|
{
|
|
"epoch": 2.4501050420168067,
|
|
"grad_norm": 16.308788493374234,
|
|
"learning_rate": 9.911420717268023e-07,
|
|
"loss": 0.5081444978713989,
|
|
"step": 4665
|
|
},
|
|
{
|
|
"epoch": 2.4506302521008405,
|
|
"grad_norm": 12.769936615698294,
|
|
"learning_rate": 9.893164472161965e-07,
|
|
"loss": 0.24696582555770874,
|
|
"step": 4666
|
|
},
|
|
{
|
|
"epoch": 2.451155462184874,
|
|
"grad_norm": 9.790135986927812,
|
|
"learning_rate": 9.874923209818249e-07,
|
|
"loss": 0.7831677198410034,
|
|
"step": 4667
|
|
},
|
|
{
|
|
"epoch": 2.4516806722689077,
|
|
"grad_norm": 11.856238608913674,
|
|
"learning_rate": 9.856696937051297e-07,
|
|
"loss": 0.6640298962593079,
|
|
"step": 4668
|
|
},
|
|
{
|
|
"epoch": 2.452205882352941,
|
|
"grad_norm": 9.527089820470042,
|
|
"learning_rate": 9.838485660669906e-07,
|
|
"loss": 0.6059166193008423,
|
|
"step": 4669
|
|
},
|
|
{
|
|
"epoch": 2.452731092436975,
|
|
"grad_norm": 21.9886168050128,
|
|
"learning_rate": 9.820289387477304e-07,
|
|
"loss": 0.6838579177856445,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 2.4532563025210083,
|
|
"grad_norm": 24.78888622836168,
|
|
"learning_rate": 9.802108124271092e-07,
|
|
"loss": 3.7656304836273193,
|
|
"step": 4671
|
|
},
|
|
{
|
|
"epoch": 2.453781512605042,
|
|
"grad_norm": 6.48566338430173,
|
|
"learning_rate": 9.78394187784325e-07,
|
|
"loss": 0.2731790244579315,
|
|
"step": 4672
|
|
},
|
|
{
|
|
"epoch": 2.4543067226890756,
|
|
"grad_norm": 14.024939105084325,
|
|
"learning_rate": 9.765790654980195e-07,
|
|
"loss": 0.49420320987701416,
|
|
"step": 4673
|
|
},
|
|
{
|
|
"epoch": 2.4548319327731094,
|
|
"grad_norm": 10.56214526304676,
|
|
"learning_rate": 9.74765446246269e-07,
|
|
"loss": 0.33996111154556274,
|
|
"step": 4674
|
|
},
|
|
{
|
|
"epoch": 2.455357142857143,
|
|
"grad_norm": 10.87291795043671,
|
|
"learning_rate": 9.729533307065897e-07,
|
|
"loss": 0.22930221259593964,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 2.4558823529411766,
|
|
"grad_norm": 5.957944172714362,
|
|
"learning_rate": 9.711427195559352e-07,
|
|
"loss": 0.2003369927406311,
|
|
"step": 4676
|
|
},
|
|
{
|
|
"epoch": 2.45640756302521,
|
|
"grad_norm": 15.022562983405647,
|
|
"learning_rate": 9.693336134706988e-07,
|
|
"loss": 0.36549103260040283,
|
|
"step": 4677
|
|
},
|
|
{
|
|
"epoch": 2.456932773109244,
|
|
"grad_norm": 15.985269360078055,
|
|
"learning_rate": 9.6752601312671e-07,
|
|
"loss": 0.6158819794654846,
|
|
"step": 4678
|
|
},
|
|
{
|
|
"epoch": 2.4574579831932772,
|
|
"grad_norm": 7.715153626258572,
|
|
"learning_rate": 9.65719919199235e-07,
|
|
"loss": 0.3991345167160034,
|
|
"step": 4679
|
|
},
|
|
{
|
|
"epoch": 2.457983193277311,
|
|
"grad_norm": 9.644010906168198,
|
|
"learning_rate": 9.63915332362982e-07,
|
|
"loss": 0.6521626710891724,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 2.4585084033613445,
|
|
"grad_norm": 12.418945035394104,
|
|
"learning_rate": 9.621122532920908e-07,
|
|
"loss": 0.5674232244491577,
|
|
"step": 4681
|
|
},
|
|
{
|
|
"epoch": 2.4590336134453783,
|
|
"grad_norm": 5.721560434142794,
|
|
"learning_rate": 9.6031068266014e-07,
|
|
"loss": 0.1428273469209671,
|
|
"step": 4682
|
|
},
|
|
{
|
|
"epoch": 2.4595588235294117,
|
|
"grad_norm": 17.981790555131614,
|
|
"learning_rate": 9.58510621140145e-07,
|
|
"loss": 0.3296002447605133,
|
|
"step": 4683
|
|
},
|
|
{
|
|
"epoch": 2.4600840336134455,
|
|
"grad_norm": 10.881012831291098,
|
|
"learning_rate": 9.567120694045568e-07,
|
|
"loss": 0.8027752041816711,
|
|
"step": 4684
|
|
},
|
|
{
|
|
"epoch": 2.460609243697479,
|
|
"grad_norm": 10.925923426486813,
|
|
"learning_rate": 9.549150281252633e-07,
|
|
"loss": 0.36758917570114136,
|
|
"step": 4685
|
|
},
|
|
{
|
|
"epoch": 2.4611344537815127,
|
|
"grad_norm": 8.777357272041694,
|
|
"learning_rate": 9.531194979735869e-07,
|
|
"loss": 0.9515275955200195,
|
|
"step": 4686
|
|
},
|
|
{
|
|
"epoch": 2.461659663865546,
|
|
"grad_norm": 11.242025681773345,
|
|
"learning_rate": 9.513254796202848e-07,
|
|
"loss": 0.6229841113090515,
|
|
"step": 4687
|
|
},
|
|
{
|
|
"epoch": 2.46218487394958,
|
|
"grad_norm": 8.82602710321931,
|
|
"learning_rate": 9.495329737355541e-07,
|
|
"loss": 0.2540172040462494,
|
|
"step": 4688
|
|
},
|
|
{
|
|
"epoch": 2.4627100840336134,
|
|
"grad_norm": 11.3910438721506,
|
|
"learning_rate": 9.477419809890215e-07,
|
|
"loss": 1.8517810106277466,
|
|
"step": 4689
|
|
},
|
|
{
|
|
"epoch": 2.463235294117647,
|
|
"grad_norm": 14.344553984642637,
|
|
"learning_rate": 9.459525020497507e-07,
|
|
"loss": 0.6088428497314453,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 2.4637605042016806,
|
|
"grad_norm": 24.59858337612728,
|
|
"learning_rate": 9.441645375862397e-07,
|
|
"loss": 0.7089042067527771,
|
|
"step": 4691
|
|
},
|
|
{
|
|
"epoch": 2.4642857142857144,
|
|
"grad_norm": 16.790495896313505,
|
|
"learning_rate": 9.423780882664202e-07,
|
|
"loss": 0.7045350074768066,
|
|
"step": 4692
|
|
},
|
|
{
|
|
"epoch": 2.464810924369748,
|
|
"grad_norm": 13.776657877349882,
|
|
"learning_rate": 9.405931547576591e-07,
|
|
"loss": 0.41512563824653625,
|
|
"step": 4693
|
|
},
|
|
{
|
|
"epoch": 2.4653361344537816,
|
|
"grad_norm": 11.554234737040302,
|
|
"learning_rate": 9.388097377267552e-07,
|
|
"loss": 0.996214747428894,
|
|
"step": 4694
|
|
},
|
|
{
|
|
"epoch": 2.465861344537815,
|
|
"grad_norm": 12.658984217175727,
|
|
"learning_rate": 9.370278378399433e-07,
|
|
"loss": 0.47079768776893616,
|
|
"step": 4695
|
|
},
|
|
{
|
|
"epoch": 2.466386554621849,
|
|
"grad_norm": 15.816315389778362,
|
|
"learning_rate": 9.352474557628899e-07,
|
|
"loss": 0.4400932788848877,
|
|
"step": 4696
|
|
},
|
|
{
|
|
"epoch": 2.4669117647058822,
|
|
"grad_norm": 8.718914840431788,
|
|
"learning_rate": 9.334685921606946e-07,
|
|
"loss": 0.19140273332595825,
|
|
"step": 4697
|
|
},
|
|
{
|
|
"epoch": 2.467436974789916,
|
|
"grad_norm": 7.9039013943502505,
|
|
"learning_rate": 9.31691247697889e-07,
|
|
"loss": 0.26404517889022827,
|
|
"step": 4698
|
|
},
|
|
{
|
|
"epoch": 2.4679621848739495,
|
|
"grad_norm": 10.2417256646224,
|
|
"learning_rate": 9.299154230384383e-07,
|
|
"loss": 0.6127966642379761,
|
|
"step": 4699
|
|
},
|
|
{
|
|
"epoch": 2.4684873949579833,
|
|
"grad_norm": 9.496690714163332,
|
|
"learning_rate": 9.281411188457396e-07,
|
|
"loss": 2.1094913482666016,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 2.4690126050420167,
|
|
"grad_norm": 13.347013677572749,
|
|
"learning_rate": 9.26368335782622e-07,
|
|
"loss": 0.4380970597267151,
|
|
"step": 4701
|
|
},
|
|
{
|
|
"epoch": 2.4695378151260505,
|
|
"grad_norm": 14.36156356847337,
|
|
"learning_rate": 9.245970745113453e-07,
|
|
"loss": 0.6515285968780518,
|
|
"step": 4702
|
|
},
|
|
{
|
|
"epoch": 2.470063025210084,
|
|
"grad_norm": 11.80035507754474,
|
|
"learning_rate": 9.228273356936046e-07,
|
|
"loss": 0.3813164532184601,
|
|
"step": 4703
|
|
},
|
|
{
|
|
"epoch": 2.4705882352941178,
|
|
"grad_norm": 8.528257081660707,
|
|
"learning_rate": 9.210591199905217e-07,
|
|
"loss": 0.6726269721984863,
|
|
"step": 4704
|
|
},
|
|
{
|
|
"epoch": 2.471113445378151,
|
|
"grad_norm": 10.551066183916396,
|
|
"learning_rate": 9.192924280626514e-07,
|
|
"loss": 0.560356855392456,
|
|
"step": 4705
|
|
},
|
|
{
|
|
"epoch": 2.471638655462185,
|
|
"grad_norm": 11.015979250413581,
|
|
"learning_rate": 9.175272605699792e-07,
|
|
"loss": 0.35912248492240906,
|
|
"step": 4706
|
|
},
|
|
{
|
|
"epoch": 2.4721638655462184,
|
|
"grad_norm": 9.392218324754262,
|
|
"learning_rate": 9.157636181719204e-07,
|
|
"loss": 0.1710374355316162,
|
|
"step": 4707
|
|
},
|
|
{
|
|
"epoch": 2.472689075630252,
|
|
"grad_norm": 13.355515376393848,
|
|
"learning_rate": 9.140015015273224e-07,
|
|
"loss": 0.3761917054653168,
|
|
"step": 4708
|
|
},
|
|
{
|
|
"epoch": 2.4732142857142856,
|
|
"grad_norm": 10.90637067923259,
|
|
"learning_rate": 9.122409112944591e-07,
|
|
"loss": 0.31881874799728394,
|
|
"step": 4709
|
|
},
|
|
{
|
|
"epoch": 2.4737394957983194,
|
|
"grad_norm": 8.854564926807791,
|
|
"learning_rate": 9.104818481310396e-07,
|
|
"loss": 0.43804216384887695,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 2.474264705882353,
|
|
"grad_norm": 9.147833087460095,
|
|
"learning_rate": 9.087243126941975e-07,
|
|
"loss": 0.6836268305778503,
|
|
"step": 4711
|
|
},
|
|
{
|
|
"epoch": 2.4747899159663866,
|
|
"grad_norm": 19.132813414443866,
|
|
"learning_rate": 9.069683056404982e-07,
|
|
"loss": 0.33152320981025696,
|
|
"step": 4712
|
|
},
|
|
{
|
|
"epoch": 2.47531512605042,
|
|
"grad_norm": 14.268196793419088,
|
|
"learning_rate": 9.052138276259348e-07,
|
|
"loss": 0.33739525079727173,
|
|
"step": 4713
|
|
},
|
|
{
|
|
"epoch": 2.475840336134454,
|
|
"grad_norm": 14.092363531663707,
|
|
"learning_rate": 9.034608793059307e-07,
|
|
"loss": 0.32641756534576416,
|
|
"step": 4714
|
|
},
|
|
{
|
|
"epoch": 2.4763655462184873,
|
|
"grad_norm": 9.068358384788377,
|
|
"learning_rate": 9.017094613353366e-07,
|
|
"loss": 0.27681398391723633,
|
|
"step": 4715
|
|
},
|
|
{
|
|
"epoch": 2.476890756302521,
|
|
"grad_norm": 10.4349516296856,
|
|
"learning_rate": 8.999595743684314e-07,
|
|
"loss": 0.349325567483902,
|
|
"step": 4716
|
|
},
|
|
{
|
|
"epoch": 2.4774159663865545,
|
|
"grad_norm": 13.80555283378645,
|
|
"learning_rate": 8.982112190589237e-07,
|
|
"loss": 0.5981140732765198,
|
|
"step": 4717
|
|
},
|
|
{
|
|
"epoch": 2.4779411764705883,
|
|
"grad_norm": 13.102668083052532,
|
|
"learning_rate": 8.96464396059949e-07,
|
|
"loss": 0.5057382583618164,
|
|
"step": 4718
|
|
},
|
|
{
|
|
"epoch": 2.4784663865546217,
|
|
"grad_norm": 9.959965509538167,
|
|
"learning_rate": 8.947191060240701e-07,
|
|
"loss": 0.26920151710510254,
|
|
"step": 4719
|
|
},
|
|
{
|
|
"epoch": 2.4789915966386555,
|
|
"grad_norm": 19.063605641074684,
|
|
"learning_rate": 8.929753496032761e-07,
|
|
"loss": 0.8154730200767517,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 2.479516806722689,
|
|
"grad_norm": 13.689453077945961,
|
|
"learning_rate": 8.912331274489855e-07,
|
|
"loss": 0.3454706072807312,
|
|
"step": 4721
|
|
},
|
|
{
|
|
"epoch": 2.4800420168067228,
|
|
"grad_norm": 10.441706029517952,
|
|
"learning_rate": 8.894924402120425e-07,
|
|
"loss": 0.3592435419559479,
|
|
"step": 4722
|
|
},
|
|
{
|
|
"epoch": 2.480567226890756,
|
|
"grad_norm": 20.216135523518552,
|
|
"learning_rate": 8.87753288542717e-07,
|
|
"loss": 0.3239816427230835,
|
|
"step": 4723
|
|
},
|
|
{
|
|
"epoch": 2.48109243697479,
|
|
"grad_norm": 15.57789120946543,
|
|
"learning_rate": 8.860156730907054e-07,
|
|
"loss": 0.6587352752685547,
|
|
"step": 4724
|
|
},
|
|
{
|
|
"epoch": 2.4816176470588234,
|
|
"grad_norm": 8.587661716318681,
|
|
"learning_rate": 8.842795945051335e-07,
|
|
"loss": 0.16333778202533722,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 2.482142857142857,
|
|
"grad_norm": 12.397698419058147,
|
|
"learning_rate": 8.825450534345486e-07,
|
|
"loss": 0.5055996179580688,
|
|
"step": 4726
|
|
},
|
|
{
|
|
"epoch": 2.4826680672268906,
|
|
"grad_norm": 16.795151339073612,
|
|
"learning_rate": 8.808120505269269e-07,
|
|
"loss": 0.31784749031066895,
|
|
"step": 4727
|
|
},
|
|
{
|
|
"epoch": 2.4831932773109244,
|
|
"grad_norm": 14.69910847418944,
|
|
"learning_rate": 8.79080586429667e-07,
|
|
"loss": 0.6859865188598633,
|
|
"step": 4728
|
|
},
|
|
{
|
|
"epoch": 2.483718487394958,
|
|
"grad_norm": 10.10533952596627,
|
|
"learning_rate": 8.773506617895944e-07,
|
|
"loss": 0.3855650722980499,
|
|
"step": 4729
|
|
},
|
|
{
|
|
"epoch": 2.4842436974789917,
|
|
"grad_norm": 11.80557983473714,
|
|
"learning_rate": 8.756222772529599e-07,
|
|
"loss": 0.4115293025970459,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 2.484768907563025,
|
|
"grad_norm": 13.927940063008188,
|
|
"learning_rate": 8.73895433465437e-07,
|
|
"loss": 0.5889797210693359,
|
|
"step": 4731
|
|
},
|
|
{
|
|
"epoch": 2.485294117647059,
|
|
"grad_norm": 9.563821181327375,
|
|
"learning_rate": 8.72170131072127e-07,
|
|
"loss": 0.4151526093482971,
|
|
"step": 4732
|
|
},
|
|
{
|
|
"epoch": 2.4858193277310923,
|
|
"grad_norm": 28.27792492837143,
|
|
"learning_rate": 8.704463707175526e-07,
|
|
"loss": 0.5376741886138916,
|
|
"step": 4733
|
|
},
|
|
{
|
|
"epoch": 2.486344537815126,
|
|
"grad_norm": 10.945468143772102,
|
|
"learning_rate": 8.687241530456608e-07,
|
|
"loss": 0.5140660405158997,
|
|
"step": 4734
|
|
},
|
|
{
|
|
"epoch": 2.4868697478991595,
|
|
"grad_norm": 11.624318257674368,
|
|
"learning_rate": 8.670034786998232e-07,
|
|
"loss": 0.8436710834503174,
|
|
"step": 4735
|
|
},
|
|
{
|
|
"epoch": 2.4873949579831933,
|
|
"grad_norm": 13.02003886638922,
|
|
"learning_rate": 8.652843483228335e-07,
|
|
"loss": 0.7538946866989136,
|
|
"step": 4736
|
|
},
|
|
{
|
|
"epoch": 2.4879201680672267,
|
|
"grad_norm": 17.921355315899405,
|
|
"learning_rate": 8.6356676255691e-07,
|
|
"loss": 0.349669873714447,
|
|
"step": 4737
|
|
},
|
|
{
|
|
"epoch": 2.4884453781512605,
|
|
"grad_norm": 13.878297019785771,
|
|
"learning_rate": 8.618507220436939e-07,
|
|
"loss": 0.5361471176147461,
|
|
"step": 4738
|
|
},
|
|
{
|
|
"epoch": 2.488970588235294,
|
|
"grad_norm": 12.16297921198656,
|
|
"learning_rate": 8.601362274242465e-07,
|
|
"loss": 0.4689074754714966,
|
|
"step": 4739
|
|
},
|
|
{
|
|
"epoch": 2.4894957983193278,
|
|
"grad_norm": 17.62693924380033,
|
|
"learning_rate": 8.584232793390562e-07,
|
|
"loss": 0.534205436706543,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 2.490021008403361,
|
|
"grad_norm": 8.706013583682777,
|
|
"learning_rate": 8.567118784280309e-07,
|
|
"loss": 0.5740219354629517,
|
|
"step": 4741
|
|
},
|
|
{
|
|
"epoch": 2.490546218487395,
|
|
"grad_norm": 20.246481701007742,
|
|
"learning_rate": 8.550020253305003e-07,
|
|
"loss": 0.7582848072052002,
|
|
"step": 4742
|
|
},
|
|
{
|
|
"epoch": 2.4910714285714284,
|
|
"grad_norm": 9.255095815609051,
|
|
"learning_rate": 8.532937206852165e-07,
|
|
"loss": 0.31694650650024414,
|
|
"step": 4743
|
|
},
|
|
{
|
|
"epoch": 2.491596638655462,
|
|
"grad_norm": 14.210622193219352,
|
|
"learning_rate": 8.515869651303533e-07,
|
|
"loss": 0.409004271030426,
|
|
"step": 4744
|
|
},
|
|
{
|
|
"epoch": 2.4921218487394956,
|
|
"grad_norm": 10.275172515676715,
|
|
"learning_rate": 8.498817593035053e-07,
|
|
"loss": 0.38191547989845276,
|
|
"step": 4745
|
|
},
|
|
{
|
|
"epoch": 2.4926470588235294,
|
|
"grad_norm": 15.07256195329395,
|
|
"learning_rate": 8.48178103841687e-07,
|
|
"loss": 0.47504180669784546,
|
|
"step": 4746
|
|
},
|
|
{
|
|
"epoch": 2.4931722689075633,
|
|
"grad_norm": 49.964165771216344,
|
|
"learning_rate": 8.464759993813382e-07,
|
|
"loss": 1.2656458616256714,
|
|
"step": 4747
|
|
},
|
|
{
|
|
"epoch": 2.4936974789915967,
|
|
"grad_norm": 12.222531563981194,
|
|
"learning_rate": 8.447754465583142e-07,
|
|
"loss": 0.5406888723373413,
|
|
"step": 4748
|
|
},
|
|
{
|
|
"epoch": 2.49422268907563,
|
|
"grad_norm": 11.470656056399244,
|
|
"learning_rate": 8.430764460078938e-07,
|
|
"loss": 0.7408872246742249,
|
|
"step": 4749
|
|
},
|
|
{
|
|
"epoch": 2.494747899159664,
|
|
"grad_norm": 12.942732280680922,
|
|
"learning_rate": 8.41378998364774e-07,
|
|
"loss": 0.8626704216003418,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 2.4952731092436977,
|
|
"grad_norm": 10.67096922730019,
|
|
"learning_rate": 8.396831042630727e-07,
|
|
"loss": 0.23665863275527954,
|
|
"step": 4751
|
|
},
|
|
{
|
|
"epoch": 2.495798319327731,
|
|
"grad_norm": 9.020429608119832,
|
|
"learning_rate": 8.379887643363277e-07,
|
|
"loss": 0.33638107776641846,
|
|
"step": 4752
|
|
},
|
|
{
|
|
"epoch": 2.4963235294117645,
|
|
"grad_norm": 13.825420485703267,
|
|
"learning_rate": 8.362959792174941e-07,
|
|
"loss": 0.33803611993789673,
|
|
"step": 4753
|
|
},
|
|
{
|
|
"epoch": 2.4968487394957983,
|
|
"grad_norm": 10.06604318894792,
|
|
"learning_rate": 8.346047495389498e-07,
|
|
"loss": 1.0515186786651611,
|
|
"step": 4754
|
|
},
|
|
{
|
|
"epoch": 2.497373949579832,
|
|
"grad_norm": 8.66381834651261,
|
|
"learning_rate": 8.329150759324894e-07,
|
|
"loss": 0.26907309889793396,
|
|
"step": 4755
|
|
},
|
|
{
|
|
"epoch": 2.4978991596638656,
|
|
"grad_norm": 8.36770765299475,
|
|
"learning_rate": 8.312269590293259e-07,
|
|
"loss": 0.456392765045166,
|
|
"step": 4756
|
|
},
|
|
{
|
|
"epoch": 2.498424369747899,
|
|
"grad_norm": 10.390199793457995,
|
|
"learning_rate": 8.295403994600921e-07,
|
|
"loss": 0.23822353780269623,
|
|
"step": 4757
|
|
},
|
|
{
|
|
"epoch": 2.4989495798319328,
|
|
"grad_norm": 11.533163558354298,
|
|
"learning_rate": 8.278553978548365e-07,
|
|
"loss": 0.3360922336578369,
|
|
"step": 4758
|
|
},
|
|
{
|
|
"epoch": 2.4994747899159666,
|
|
"grad_norm": 9.65091677840734,
|
|
"learning_rate": 8.261719548430292e-07,
|
|
"loss": 0.219955176115036,
|
|
"step": 4759
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 18.190299748999404,
|
|
"learning_rate": 8.244900710535547e-07,
|
|
"loss": 0.5958255529403687,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 2.5005252100840334,
|
|
"grad_norm": 7.997470071309525,
|
|
"learning_rate": 8.228097471147167e-07,
|
|
"loss": 0.3603939414024353,
|
|
"step": 4761
|
|
},
|
|
{
|
|
"epoch": 2.5010504201680672,
|
|
"grad_norm": 11.77676038041079,
|
|
"learning_rate": 8.211309836542369e-07,
|
|
"loss": 0.3790106177330017,
|
|
"step": 4762
|
|
},
|
|
{
|
|
"epoch": 2.501575630252101,
|
|
"grad_norm": 8.184737025733762,
|
|
"learning_rate": 8.194537812992531e-07,
|
|
"loss": 0.18647828698158264,
|
|
"step": 4763
|
|
},
|
|
{
|
|
"epoch": 2.5021008403361344,
|
|
"grad_norm": 9.537869020221029,
|
|
"learning_rate": 8.177781406763196e-07,
|
|
"loss": 0.9135514497756958,
|
|
"step": 4764
|
|
},
|
|
{
|
|
"epoch": 2.502626050420168,
|
|
"grad_norm": 8.44957865906863,
|
|
"learning_rate": 8.161040624114075e-07,
|
|
"loss": 0.5648715496063232,
|
|
"step": 4765
|
|
},
|
|
{
|
|
"epoch": 2.5031512605042017,
|
|
"grad_norm": 7.901263133771217,
|
|
"learning_rate": 8.144315471299046e-07,
|
|
"loss": 0.43349596858024597,
|
|
"step": 4766
|
|
},
|
|
{
|
|
"epoch": 2.5036764705882355,
|
|
"grad_norm": 9.546382607738664,
|
|
"learning_rate": 8.127605954566143e-07,
|
|
"loss": 0.3956523835659027,
|
|
"step": 4767
|
|
},
|
|
{
|
|
"epoch": 2.504201680672269,
|
|
"grad_norm": 10.824281803598327,
|
|
"learning_rate": 8.110912080157552e-07,
|
|
"loss": 0.44914332032203674,
|
|
"step": 4768
|
|
},
|
|
{
|
|
"epoch": 2.5047268907563023,
|
|
"grad_norm": 9.70877017119002,
|
|
"learning_rate": 8.094233854309647e-07,
|
|
"loss": 0.44429028034210205,
|
|
"step": 4769
|
|
},
|
|
{
|
|
"epoch": 2.505252100840336,
|
|
"grad_norm": 18.192437595582472,
|
|
"learning_rate": 8.077571283252928e-07,
|
|
"loss": 0.46342164278030396,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 2.50577731092437,
|
|
"grad_norm": 12.197374142546467,
|
|
"learning_rate": 8.060924373212042e-07,
|
|
"loss": 0.7917468547821045,
|
|
"step": 4771
|
|
},
|
|
{
|
|
"epoch": 2.5063025210084033,
|
|
"grad_norm": 11.550320342691949,
|
|
"learning_rate": 8.044293130405806e-07,
|
|
"loss": 0.3075546622276306,
|
|
"step": 4772
|
|
},
|
|
{
|
|
"epoch": 2.5068277310924367,
|
|
"grad_norm": 9.685331571603992,
|
|
"learning_rate": 8.027677561047176e-07,
|
|
"loss": 0.8057847619056702,
|
|
"step": 4773
|
|
},
|
|
{
|
|
"epoch": 2.5073529411764706,
|
|
"grad_norm": 10.380721309475254,
|
|
"learning_rate": 8.011077671343248e-07,
|
|
"loss": 0.7955812811851501,
|
|
"step": 4774
|
|
},
|
|
{
|
|
"epoch": 2.5078781512605044,
|
|
"grad_norm": 10.59970516497924,
|
|
"learning_rate": 7.994493467495262e-07,
|
|
"loss": 0.40353015065193176,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 2.508403361344538,
|
|
"grad_norm": 13.107800609546093,
|
|
"learning_rate": 7.977924955698591e-07,
|
|
"loss": 0.4272182285785675,
|
|
"step": 4776
|
|
},
|
|
{
|
|
"epoch": 2.508928571428571,
|
|
"grad_norm": 10.59584657769079,
|
|
"learning_rate": 7.961372142142776e-07,
|
|
"loss": 0.4582866430282593,
|
|
"step": 4777
|
|
},
|
|
{
|
|
"epoch": 2.509453781512605,
|
|
"grad_norm": 11.188308198926357,
|
|
"learning_rate": 7.944835033011472e-07,
|
|
"loss": 0.21376478672027588,
|
|
"step": 4778
|
|
},
|
|
{
|
|
"epoch": 2.509978991596639,
|
|
"grad_norm": 15.013372347267286,
|
|
"learning_rate": 7.928313634482454e-07,
|
|
"loss": 0.488264262676239,
|
|
"step": 4779
|
|
},
|
|
{
|
|
"epoch": 2.5105042016806722,
|
|
"grad_norm": 6.666361609200026,
|
|
"learning_rate": 7.911807952727652e-07,
|
|
"loss": 0.2537972331047058,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 2.5110294117647056,
|
|
"grad_norm": 12.349476818121895,
|
|
"learning_rate": 7.89531799391311e-07,
|
|
"loss": 0.5406059622764587,
|
|
"step": 4781
|
|
},
|
|
{
|
|
"epoch": 2.5115546218487395,
|
|
"grad_norm": 14.989702434119808,
|
|
"learning_rate": 7.878843764199007e-07,
|
|
"loss": 0.542559027671814,
|
|
"step": 4782
|
|
},
|
|
{
|
|
"epoch": 2.5120798319327733,
|
|
"grad_norm": 9.068316772517909,
|
|
"learning_rate": 7.862385269739625e-07,
|
|
"loss": 0.18501508235931396,
|
|
"step": 4783
|
|
},
|
|
{
|
|
"epoch": 2.5126050420168067,
|
|
"grad_norm": 7.601244891676653,
|
|
"learning_rate": 7.845942516683414e-07,
|
|
"loss": 0.4172128438949585,
|
|
"step": 4784
|
|
},
|
|
{
|
|
"epoch": 2.51313025210084,
|
|
"grad_norm": 14.302351113185587,
|
|
"learning_rate": 7.829515511172897e-07,
|
|
"loss": 0.5935714244842529,
|
|
"step": 4785
|
|
},
|
|
{
|
|
"epoch": 2.513655462184874,
|
|
"grad_norm": 8.467727121395807,
|
|
"learning_rate": 7.813104259344739e-07,
|
|
"loss": 0.3091030418872833,
|
|
"step": 4786
|
|
},
|
|
{
|
|
"epoch": 2.5141806722689077,
|
|
"grad_norm": 12.752753375998163,
|
|
"learning_rate": 7.796708767329708e-07,
|
|
"loss": 0.4248979687690735,
|
|
"step": 4787
|
|
},
|
|
{
|
|
"epoch": 2.514705882352941,
|
|
"grad_norm": 11.038479542489673,
|
|
"learning_rate": 7.780329041252688e-07,
|
|
"loss": 0.35864678025245667,
|
|
"step": 4788
|
|
},
|
|
{
|
|
"epoch": 2.5152310924369745,
|
|
"grad_norm": 11.241897403094827,
|
|
"learning_rate": 7.763965087232678e-07,
|
|
"loss": 0.34305691719055176,
|
|
"step": 4789
|
|
},
|
|
{
|
|
"epoch": 2.5157563025210083,
|
|
"grad_norm": 7.78712659878438,
|
|
"learning_rate": 7.747616911382766e-07,
|
|
"loss": 0.31905481219291687,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 2.516281512605042,
|
|
"grad_norm": 11.718760122762212,
|
|
"learning_rate": 7.73128451981019e-07,
|
|
"loss": 0.27002179622650146,
|
|
"step": 4791
|
|
},
|
|
{
|
|
"epoch": 2.5168067226890756,
|
|
"grad_norm": 13.498485736525947,
|
|
"learning_rate": 7.714967918616245e-07,
|
|
"loss": 0.3575587272644043,
|
|
"step": 4792
|
|
},
|
|
{
|
|
"epoch": 2.5173319327731094,
|
|
"grad_norm": 9.834120956463467,
|
|
"learning_rate": 7.698667113896346e-07,
|
|
"loss": 0.5128017663955688,
|
|
"step": 4793
|
|
},
|
|
{
|
|
"epoch": 2.517857142857143,
|
|
"grad_norm": 16.52662972061671,
|
|
"learning_rate": 7.682382111740011e-07,
|
|
"loss": 0.807076096534729,
|
|
"step": 4794
|
|
},
|
|
{
|
|
"epoch": 2.5183823529411766,
|
|
"grad_norm": 5.629052823987348,
|
|
"learning_rate": 7.666112918230839e-07,
|
|
"loss": 0.22683599591255188,
|
|
"step": 4795
|
|
},
|
|
{
|
|
"epoch": 2.51890756302521,
|
|
"grad_norm": 10.892963341447294,
|
|
"learning_rate": 7.649859539446547e-07,
|
|
"loss": 0.44364529848098755,
|
|
"step": 4796
|
|
},
|
|
{
|
|
"epoch": 2.519432773109244,
|
|
"grad_norm": 8.351755144819656,
|
|
"learning_rate": 7.633621981458916e-07,
|
|
"loss": 0.24138270318508148,
|
|
"step": 4797
|
|
},
|
|
{
|
|
"epoch": 2.5199579831932772,
|
|
"grad_norm": 7.268189185600107,
|
|
"learning_rate": 7.617400250333834e-07,
|
|
"loss": 0.24619129300117493,
|
|
"step": 4798
|
|
},
|
|
{
|
|
"epoch": 2.520483193277311,
|
|
"grad_norm": 8.801647696715333,
|
|
"learning_rate": 7.601194352131285e-07,
|
|
"loss": 0.2071070671081543,
|
|
"step": 4799
|
|
},
|
|
{
|
|
"epoch": 2.5210084033613445,
|
|
"grad_norm": 14.145085712514055,
|
|
"learning_rate": 7.585004292905329e-07,
|
|
"loss": 0.33978474140167236,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 2.5215336134453783,
|
|
"grad_norm": 11.188695002667309,
|
|
"learning_rate": 7.568830078704092e-07,
|
|
"loss": 0.7669999003410339,
|
|
"step": 4801
|
|
},
|
|
{
|
|
"epoch": 2.5220588235294117,
|
|
"grad_norm": 11.274157481480405,
|
|
"learning_rate": 7.552671715569809e-07,
|
|
"loss": 0.23734520375728607,
|
|
"step": 4802
|
|
},
|
|
{
|
|
"epoch": 2.5225840336134455,
|
|
"grad_norm": 10.725553642871082,
|
|
"learning_rate": 7.536529209538773e-07,
|
|
"loss": 0.40044504404067993,
|
|
"step": 4803
|
|
},
|
|
{
|
|
"epoch": 2.523109243697479,
|
|
"grad_norm": 14.974230115543515,
|
|
"learning_rate": 7.520402566641366e-07,
|
|
"loss": 0.3545833230018616,
|
|
"step": 4804
|
|
},
|
|
{
|
|
"epoch": 2.5236344537815127,
|
|
"grad_norm": 11.605260579557688,
|
|
"learning_rate": 7.504291792902024e-07,
|
|
"loss": 0.6216102838516235,
|
|
"step": 4805
|
|
},
|
|
{
|
|
"epoch": 2.524159663865546,
|
|
"grad_norm": 10.105806232277027,
|
|
"learning_rate": 7.488196894339289e-07,
|
|
"loss": 0.19713947176933289,
|
|
"step": 4806
|
|
},
|
|
{
|
|
"epoch": 2.52468487394958,
|
|
"grad_norm": 9.830120449234002,
|
|
"learning_rate": 7.472117876965751e-07,
|
|
"loss": 0.6729844212532043,
|
|
"step": 4807
|
|
},
|
|
{
|
|
"epoch": 2.5252100840336134,
|
|
"grad_norm": 9.67114193153479,
|
|
"learning_rate": 7.45605474678806e-07,
|
|
"loss": 0.7624964714050293,
|
|
"step": 4808
|
|
},
|
|
{
|
|
"epoch": 2.525735294117647,
|
|
"grad_norm": 9.966141114744058,
|
|
"learning_rate": 7.440007509806946e-07,
|
|
"loss": 0.37602072954177856,
|
|
"step": 4809
|
|
},
|
|
{
|
|
"epoch": 2.5262605042016806,
|
|
"grad_norm": 11.326456009743,
|
|
"learning_rate": 7.423976172017194e-07,
|
|
"loss": 0.24186234176158905,
|
|
"step": 4810
|
|
},
|
|
{
|
|
"epoch": 2.5267857142857144,
|
|
"grad_norm": 11.8197693174477,
|
|
"learning_rate": 7.407960739407649e-07,
|
|
"loss": 0.6827012300491333,
|
|
"step": 4811
|
|
},
|
|
{
|
|
"epoch": 2.527310924369748,
|
|
"grad_norm": 13.644422459868707,
|
|
"learning_rate": 7.391961217961224e-07,
|
|
"loss": 0.5761939287185669,
|
|
"step": 4812
|
|
},
|
|
{
|
|
"epoch": 2.5278361344537816,
|
|
"grad_norm": 15.510772845535444,
|
|
"learning_rate": 7.375977613654861e-07,
|
|
"loss": 0.46874308586120605,
|
|
"step": 4813
|
|
},
|
|
{
|
|
"epoch": 2.528361344537815,
|
|
"grad_norm": 9.707397970383074,
|
|
"learning_rate": 7.360009932459605e-07,
|
|
"loss": 0.40307551622390747,
|
|
"step": 4814
|
|
},
|
|
{
|
|
"epoch": 2.528886554621849,
|
|
"grad_norm": 7.586768252041111,
|
|
"learning_rate": 7.344058180340513e-07,
|
|
"loss": 0.4715978503227234,
|
|
"step": 4815
|
|
},
|
|
{
|
|
"epoch": 2.5294117647058822,
|
|
"grad_norm": 10.679268564030183,
|
|
"learning_rate": 7.328122363256696e-07,
|
|
"loss": 0.3112912178039551,
|
|
"step": 4816
|
|
},
|
|
{
|
|
"epoch": 2.529936974789916,
|
|
"grad_norm": 8.615670193104895,
|
|
"learning_rate": 7.312202487161318e-07,
|
|
"loss": 0.6268161535263062,
|
|
"step": 4817
|
|
},
|
|
{
|
|
"epoch": 2.5304621848739495,
|
|
"grad_norm": 11.014823921525975,
|
|
"learning_rate": 7.296298558001592e-07,
|
|
"loss": 0.3747507929801941,
|
|
"step": 4818
|
|
},
|
|
{
|
|
"epoch": 2.5309873949579833,
|
|
"grad_norm": 7.760951885638209,
|
|
"learning_rate": 7.280410581718761e-07,
|
|
"loss": 0.5421075224876404,
|
|
"step": 4819
|
|
},
|
|
{
|
|
"epoch": 2.5315126050420167,
|
|
"grad_norm": 17.31951317624361,
|
|
"learning_rate": 7.26453856424812e-07,
|
|
"loss": 0.7568320631980896,
|
|
"step": 4820
|
|
},
|
|
{
|
|
"epoch": 2.5320378151260505,
|
|
"grad_norm": 13.174117229431252,
|
|
"learning_rate": 7.248682511519006e-07,
|
|
"loss": 0.8093594312667847,
|
|
"step": 4821
|
|
},
|
|
{
|
|
"epoch": 2.532563025210084,
|
|
"grad_norm": 12.17679758920096,
|
|
"learning_rate": 7.232842429454784e-07,
|
|
"loss": 0.7586328983306885,
|
|
"step": 4822
|
|
},
|
|
{
|
|
"epoch": 2.5330882352941178,
|
|
"grad_norm": 13.971461301282277,
|
|
"learning_rate": 7.217018323972852e-07,
|
|
"loss": 0.6551073789596558,
|
|
"step": 4823
|
|
},
|
|
{
|
|
"epoch": 2.533613445378151,
|
|
"grad_norm": 11.482307681061377,
|
|
"learning_rate": 7.201210200984643e-07,
|
|
"loss": 0.27159246802330017,
|
|
"step": 4824
|
|
},
|
|
{
|
|
"epoch": 2.534138655462185,
|
|
"grad_norm": 8.20542700556153,
|
|
"learning_rate": 7.18541806639561e-07,
|
|
"loss": 0.2602250277996063,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 2.5346638655462184,
|
|
"grad_norm": 12.269919628277533,
|
|
"learning_rate": 7.169641926105247e-07,
|
|
"loss": 0.7117254734039307,
|
|
"step": 4826
|
|
},
|
|
{
|
|
"epoch": 2.535189075630252,
|
|
"grad_norm": 15.909872331796288,
|
|
"learning_rate": 7.153881786007056e-07,
|
|
"loss": 0.7022569179534912,
|
|
"step": 4827
|
|
},
|
|
{
|
|
"epoch": 2.5357142857142856,
|
|
"grad_norm": 8.44994351786321,
|
|
"learning_rate": 7.138137651988597e-07,
|
|
"loss": 0.6579830646514893,
|
|
"step": 4828
|
|
},
|
|
{
|
|
"epoch": 2.5362394957983194,
|
|
"grad_norm": 10.151957727346831,
|
|
"learning_rate": 7.122409529931412e-07,
|
|
"loss": 0.8653113842010498,
|
|
"step": 4829
|
|
},
|
|
{
|
|
"epoch": 2.536764705882353,
|
|
"grad_norm": 15.90754446592996,
|
|
"learning_rate": 7.106697425711062e-07,
|
|
"loss": 0.5251384973526001,
|
|
"step": 4830
|
|
},
|
|
{
|
|
"epoch": 2.5372899159663866,
|
|
"grad_norm": 12.169849091145256,
|
|
"learning_rate": 7.091001345197168e-07,
|
|
"loss": 0.6808485984802246,
|
|
"step": 4831
|
|
},
|
|
{
|
|
"epoch": 2.53781512605042,
|
|
"grad_norm": 11.718251401864599,
|
|
"learning_rate": 7.075321294253324e-07,
|
|
"loss": 0.29910916090011597,
|
|
"step": 4832
|
|
},
|
|
{
|
|
"epoch": 2.538340336134454,
|
|
"grad_norm": 16.413172899149846,
|
|
"learning_rate": 7.059657278737136e-07,
|
|
"loss": 0.6642720103263855,
|
|
"step": 4833
|
|
},
|
|
{
|
|
"epoch": 2.5388655462184873,
|
|
"grad_norm": 11.06091649328897,
|
|
"learning_rate": 7.044009304500238e-07,
|
|
"loss": 0.48181262612342834,
|
|
"step": 4834
|
|
},
|
|
{
|
|
"epoch": 2.539390756302521,
|
|
"grad_norm": 13.909343184916338,
|
|
"learning_rate": 7.028377377388262e-07,
|
|
"loss": 0.48922231793403625,
|
|
"step": 4835
|
|
},
|
|
{
|
|
"epoch": 2.5399159663865545,
|
|
"grad_norm": 14.748736598087799,
|
|
"learning_rate": 7.012761503240845e-07,
|
|
"loss": 0.25614088773727417,
|
|
"step": 4836
|
|
},
|
|
{
|
|
"epoch": 2.5404411764705883,
|
|
"grad_norm": 9.388733308532037,
|
|
"learning_rate": 6.997161687891635e-07,
|
|
"loss": 0.24819070100784302,
|
|
"step": 4837
|
|
},
|
|
{
|
|
"epoch": 2.5409663865546217,
|
|
"grad_norm": 15.797221194363438,
|
|
"learning_rate": 6.981577937168277e-07,
|
|
"loss": 0.5666882991790771,
|
|
"step": 4838
|
|
},
|
|
{
|
|
"epoch": 2.5414915966386555,
|
|
"grad_norm": 16.159389042946902,
|
|
"learning_rate": 6.966010256892408e-07,
|
|
"loss": 0.47369974851608276,
|
|
"step": 4839
|
|
},
|
|
{
|
|
"epoch": 2.542016806722689,
|
|
"grad_norm": 9.014616031246243,
|
|
"learning_rate": 6.950458652879671e-07,
|
|
"loss": 0.48810073733329773,
|
|
"step": 4840
|
|
},
|
|
{
|
|
"epoch": 2.5425420168067228,
|
|
"grad_norm": 7.811677899440476,
|
|
"learning_rate": 6.934923130939692e-07,
|
|
"loss": 0.22124749422073364,
|
|
"step": 4841
|
|
},
|
|
{
|
|
"epoch": 2.543067226890756,
|
|
"grad_norm": 20.703581400231325,
|
|
"learning_rate": 6.919403696876098e-07,
|
|
"loss": 0.6604146957397461,
|
|
"step": 4842
|
|
},
|
|
{
|
|
"epoch": 2.54359243697479,
|
|
"grad_norm": 10.027770271529535,
|
|
"learning_rate": 6.903900356486504e-07,
|
|
"loss": 0.527151346206665,
|
|
"step": 4843
|
|
},
|
|
{
|
|
"epoch": 2.5441176470588234,
|
|
"grad_norm": 16.503592775774724,
|
|
"learning_rate": 6.888413115562504e-07,
|
|
"loss": 0.5747373700141907,
|
|
"step": 4844
|
|
},
|
|
{
|
|
"epoch": 2.544642857142857,
|
|
"grad_norm": 17.534450929473888,
|
|
"learning_rate": 6.872941979889708e-07,
|
|
"loss": 0.8626971244812012,
|
|
"step": 4845
|
|
},
|
|
{
|
|
"epoch": 2.5451680672268906,
|
|
"grad_norm": 8.60113631480442,
|
|
"learning_rate": 6.857486955247677e-07,
|
|
"loss": 0.18687665462493896,
|
|
"step": 4846
|
|
},
|
|
{
|
|
"epoch": 2.5456932773109244,
|
|
"grad_norm": 12.47739944839357,
|
|
"learning_rate": 6.842048047409966e-07,
|
|
"loss": 0.629452645778656,
|
|
"step": 4847
|
|
},
|
|
{
|
|
"epoch": 2.546218487394958,
|
|
"grad_norm": 15.422739930466173,
|
|
"learning_rate": 6.826625262144105e-07,
|
|
"loss": 0.4687202572822571,
|
|
"step": 4848
|
|
},
|
|
{
|
|
"epoch": 2.5467436974789917,
|
|
"grad_norm": 10.402910406926852,
|
|
"learning_rate": 6.811218605211606e-07,
|
|
"loss": 0.28766074776649475,
|
|
"step": 4849
|
|
},
|
|
{
|
|
"epoch": 2.5472689075630255,
|
|
"grad_norm": 14.811406208320356,
|
|
"learning_rate": 6.79582808236795e-07,
|
|
"loss": 0.391085684299469,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 2.547794117647059,
|
|
"grad_norm": 9.893069366097043,
|
|
"learning_rate": 6.780453699362583e-07,
|
|
"loss": 0.4918053448200226,
|
|
"step": 4851
|
|
},
|
|
{
|
|
"epoch": 2.5483193277310923,
|
|
"grad_norm": 10.627153526672627,
|
|
"learning_rate": 6.765095461938964e-07,
|
|
"loss": 0.9855205416679382,
|
|
"step": 4852
|
|
},
|
|
{
|
|
"epoch": 2.548844537815126,
|
|
"grad_norm": 13.978483999394332,
|
|
"learning_rate": 6.749753375834467e-07,
|
|
"loss": 0.6464021801948547,
|
|
"step": 4853
|
|
},
|
|
{
|
|
"epoch": 2.54936974789916,
|
|
"grad_norm": 10.065154229744966,
|
|
"learning_rate": 6.734427446780467e-07,
|
|
"loss": 0.32371342182159424,
|
|
"step": 4854
|
|
},
|
|
{
|
|
"epoch": 2.5498949579831933,
|
|
"grad_norm": 14.54852078626233,
|
|
"learning_rate": 6.71911768050228e-07,
|
|
"loss": 1.1138784885406494,
|
|
"step": 4855
|
|
},
|
|
{
|
|
"epoch": 2.5504201680672267,
|
|
"grad_norm": 14.099701785370709,
|
|
"learning_rate": 6.703824082719201e-07,
|
|
"loss": 0.5352606177330017,
|
|
"step": 4856
|
|
},
|
|
{
|
|
"epoch": 2.5509453781512605,
|
|
"grad_norm": 12.263050013668199,
|
|
"learning_rate": 6.688546659144479e-07,
|
|
"loss": 0.36302947998046875,
|
|
"step": 4857
|
|
},
|
|
{
|
|
"epoch": 2.5514705882352944,
|
|
"grad_norm": 13.406656503641392,
|
|
"learning_rate": 6.67328541548532e-07,
|
|
"loss": 0.37487393617630005,
|
|
"step": 4858
|
|
},
|
|
{
|
|
"epoch": 2.5519957983193278,
|
|
"grad_norm": 10.510114460032613,
|
|
"learning_rate": 6.658040357442874e-07,
|
|
"loss": 1.5453276634216309,
|
|
"step": 4859
|
|
},
|
|
{
|
|
"epoch": 2.552521008403361,
|
|
"grad_norm": 16.962523456060687,
|
|
"learning_rate": 6.642811490712281e-07,
|
|
"loss": 0.5763338804244995,
|
|
"step": 4860
|
|
},
|
|
{
|
|
"epoch": 2.553046218487395,
|
|
"grad_norm": 14.6903233682848,
|
|
"learning_rate": 6.627598820982595e-07,
|
|
"loss": 0.4196200966835022,
|
|
"step": 4861
|
|
},
|
|
{
|
|
"epoch": 2.553571428571429,
|
|
"grad_norm": 11.346449668829871,
|
|
"learning_rate": 6.612402353936836e-07,
|
|
"loss": 0.5094459652900696,
|
|
"step": 4862
|
|
},
|
|
{
|
|
"epoch": 2.554096638655462,
|
|
"grad_norm": 16.336665587478024,
|
|
"learning_rate": 6.597222095251965e-07,
|
|
"loss": 0.5358494520187378,
|
|
"step": 4863
|
|
},
|
|
{
|
|
"epoch": 2.5546218487394956,
|
|
"grad_norm": 11.334003773741667,
|
|
"learning_rate": 6.582058050598894e-07,
|
|
"loss": 0.6448275446891785,
|
|
"step": 4864
|
|
},
|
|
{
|
|
"epoch": 2.5551470588235294,
|
|
"grad_norm": 10.359794671028979,
|
|
"learning_rate": 6.566910225642475e-07,
|
|
"loss": 0.33972978591918945,
|
|
"step": 4865
|
|
},
|
|
{
|
|
"epoch": 2.5556722689075633,
|
|
"grad_norm": 11.868815376042482,
|
|
"learning_rate": 6.551778626041483e-07,
|
|
"loss": 0.24651628732681274,
|
|
"step": 4866
|
|
},
|
|
{
|
|
"epoch": 2.5561974789915967,
|
|
"grad_norm": 12.24077182388493,
|
|
"learning_rate": 6.536663257448678e-07,
|
|
"loss": 0.5210819840431213,
|
|
"step": 4867
|
|
},
|
|
{
|
|
"epoch": 2.55672268907563,
|
|
"grad_norm": 9.652755066256226,
|
|
"learning_rate": 6.52156412551071e-07,
|
|
"loss": 0.5718560218811035,
|
|
"step": 4868
|
|
},
|
|
{
|
|
"epoch": 2.557247899159664,
|
|
"grad_norm": 11.699170284774768,
|
|
"learning_rate": 6.50648123586819e-07,
|
|
"loss": 0.4137555956840515,
|
|
"step": 4869
|
|
},
|
|
{
|
|
"epoch": 2.5577731092436977,
|
|
"grad_norm": 10.862464511081688,
|
|
"learning_rate": 6.491414594155648e-07,
|
|
"loss": 0.32669249176979065,
|
|
"step": 4870
|
|
},
|
|
{
|
|
"epoch": 2.558298319327731,
|
|
"grad_norm": 8.243241190545326,
|
|
"learning_rate": 6.476364206001545e-07,
|
|
"loss": 0.15914994478225708,
|
|
"step": 4871
|
|
},
|
|
{
|
|
"epoch": 2.5588235294117645,
|
|
"grad_norm": 8.97745382811696,
|
|
"learning_rate": 6.461330077028283e-07,
|
|
"loss": 0.7450168132781982,
|
|
"step": 4872
|
|
},
|
|
{
|
|
"epoch": 2.5593487394957983,
|
|
"grad_norm": 10.807797586108821,
|
|
"learning_rate": 6.446312212852162e-07,
|
|
"loss": 0.3181978464126587,
|
|
"step": 4873
|
|
},
|
|
{
|
|
"epoch": 2.559873949579832,
|
|
"grad_norm": 10.772107006863616,
|
|
"learning_rate": 6.431310619083453e-07,
|
|
"loss": 0.5689761638641357,
|
|
"step": 4874
|
|
},
|
|
{
|
|
"epoch": 2.5603991596638656,
|
|
"grad_norm": 21.676249425336703,
|
|
"learning_rate": 6.416325301326304e-07,
|
|
"loss": 2.277855157852173,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 2.560924369747899,
|
|
"grad_norm": 7.9307616325273536,
|
|
"learning_rate": 6.401356265178798e-07,
|
|
"loss": 0.29816046357154846,
|
|
"step": 4876
|
|
},
|
|
{
|
|
"epoch": 2.5614495798319328,
|
|
"grad_norm": 10.364708876046468,
|
|
"learning_rate": 6.386403516232948e-07,
|
|
"loss": 0.29614073038101196,
|
|
"step": 4877
|
|
},
|
|
{
|
|
"epoch": 2.5619747899159666,
|
|
"grad_norm": 9.06537771630169,
|
|
"learning_rate": 6.37146706007466e-07,
|
|
"loss": 0.7089647054672241,
|
|
"step": 4878
|
|
},
|
|
{
|
|
"epoch": 2.5625,
|
|
"grad_norm": 12.997617937172018,
|
|
"learning_rate": 6.356546902283772e-07,
|
|
"loss": 1.478808879852295,
|
|
"step": 4879
|
|
},
|
|
{
|
|
"epoch": 2.5630252100840334,
|
|
"grad_norm": 10.727525301484945,
|
|
"learning_rate": 6.341643048434027e-07,
|
|
"loss": 0.3855453431606293,
|
|
"step": 4880
|
|
},
|
|
{
|
|
"epoch": 2.5635504201680672,
|
|
"grad_norm": 16.869347084069474,
|
|
"learning_rate": 6.326755504093063e-07,
|
|
"loss": 0.7136096954345703,
|
|
"step": 4881
|
|
},
|
|
{
|
|
"epoch": 2.564075630252101,
|
|
"grad_norm": 11.201752979927996,
|
|
"learning_rate": 6.311884274822461e-07,
|
|
"loss": 0.5269986987113953,
|
|
"step": 4882
|
|
},
|
|
{
|
|
"epoch": 2.5646008403361344,
|
|
"grad_norm": 8.516308383367065,
|
|
"learning_rate": 6.29702936617767e-07,
|
|
"loss": 0.18743552267551422,
|
|
"step": 4883
|
|
},
|
|
{
|
|
"epoch": 2.565126050420168,
|
|
"grad_norm": 11.388982319970065,
|
|
"learning_rate": 6.28219078370807e-07,
|
|
"loss": 0.33473318815231323,
|
|
"step": 4884
|
|
},
|
|
{
|
|
"epoch": 2.5656512605042017,
|
|
"grad_norm": 9.306414957595914,
|
|
"learning_rate": 6.267368532956919e-07,
|
|
"loss": 0.30755195021629333,
|
|
"step": 4885
|
|
},
|
|
{
|
|
"epoch": 2.5661764705882355,
|
|
"grad_norm": 22.493330060283913,
|
|
"learning_rate": 6.252562619461389e-07,
|
|
"loss": 1.0576213598251343,
|
|
"step": 4886
|
|
},
|
|
{
|
|
"epoch": 2.566701680672269,
|
|
"grad_norm": 10.813401210980702,
|
|
"learning_rate": 6.237773048752538e-07,
|
|
"loss": 0.3524538278579712,
|
|
"step": 4887
|
|
},
|
|
{
|
|
"epoch": 2.5672268907563023,
|
|
"grad_norm": 11.788023301145108,
|
|
"learning_rate": 6.222999826355325e-07,
|
|
"loss": 0.26946529746055603,
|
|
"step": 4888
|
|
},
|
|
{
|
|
"epoch": 2.567752100840336,
|
|
"grad_norm": 8.763911909070023,
|
|
"learning_rate": 6.208242957788613e-07,
|
|
"loss": 0.2788327932357788,
|
|
"step": 4889
|
|
},
|
|
{
|
|
"epoch": 2.56827731092437,
|
|
"grad_norm": 10.845340501061996,
|
|
"learning_rate": 6.193502448565142e-07,
|
|
"loss": 0.48048681020736694,
|
|
"step": 4890
|
|
},
|
|
{
|
|
"epoch": 2.5688025210084033,
|
|
"grad_norm": 8.02975276473133,
|
|
"learning_rate": 6.178778304191535e-07,
|
|
"loss": 0.36470428109169006,
|
|
"step": 4891
|
|
},
|
|
{
|
|
"epoch": 2.5693277310924367,
|
|
"grad_norm": 11.462410820599452,
|
|
"learning_rate": 6.164070530168315e-07,
|
|
"loss": 0.3302750885486603,
|
|
"step": 4892
|
|
},
|
|
{
|
|
"epoch": 2.5698529411764706,
|
|
"grad_norm": 17.55556566514967,
|
|
"learning_rate": 6.14937913198988e-07,
|
|
"loss": 0.3937831521034241,
|
|
"step": 4893
|
|
},
|
|
{
|
|
"epoch": 2.5703781512605044,
|
|
"grad_norm": 9.263585463703778,
|
|
"learning_rate": 6.134704115144519e-07,
|
|
"loss": 0.3886506259441376,
|
|
"step": 4894
|
|
},
|
|
{
|
|
"epoch": 2.570903361344538,
|
|
"grad_norm": 12.206676542502768,
|
|
"learning_rate": 6.120045485114396e-07,
|
|
"loss": 0.3952086567878723,
|
|
"step": 4895
|
|
},
|
|
{
|
|
"epoch": 2.571428571428571,
|
|
"grad_norm": 18.626192015101026,
|
|
"learning_rate": 6.105403247375541e-07,
|
|
"loss": 0.3314104378223419,
|
|
"step": 4896
|
|
},
|
|
{
|
|
"epoch": 2.571953781512605,
|
|
"grad_norm": 10.46441168576784,
|
|
"learning_rate": 6.090777407397902e-07,
|
|
"loss": 0.5985084772109985,
|
|
"step": 4897
|
|
},
|
|
{
|
|
"epoch": 2.572478991596639,
|
|
"grad_norm": 7.2085395213955,
|
|
"learning_rate": 6.076167970645252e-07,
|
|
"loss": 0.3474208116531372,
|
|
"step": 4898
|
|
},
|
|
{
|
|
"epoch": 2.5730042016806722,
|
|
"grad_norm": 18.13439406031624,
|
|
"learning_rate": 6.061574942575266e-07,
|
|
"loss": 0.7391008138656616,
|
|
"step": 4899
|
|
},
|
|
{
|
|
"epoch": 2.5735294117647056,
|
|
"grad_norm": 14.427053044116693,
|
|
"learning_rate": 6.046998328639475e-07,
|
|
"loss": 0.4393512010574341,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 2.5740546218487395,
|
|
"grad_norm": 10.916262159945981,
|
|
"learning_rate": 6.032438134283286e-07,
|
|
"loss": 0.3656442165374756,
|
|
"step": 4901
|
|
},
|
|
{
|
|
"epoch": 2.5745798319327733,
|
|
"grad_norm": 12.103255393857316,
|
|
"learning_rate": 6.017894364945964e-07,
|
|
"loss": 0.6148119568824768,
|
|
"step": 4902
|
|
},
|
|
{
|
|
"epoch": 2.5751050420168067,
|
|
"grad_norm": 14.915322403540596,
|
|
"learning_rate": 6.003367026060647e-07,
|
|
"loss": 0.29871106147766113,
|
|
"step": 4903
|
|
},
|
|
{
|
|
"epoch": 2.57563025210084,
|
|
"grad_norm": 16.957751874371827,
|
|
"learning_rate": 5.988856123054337e-07,
|
|
"loss": 0.4171563386917114,
|
|
"step": 4904
|
|
},
|
|
{
|
|
"epoch": 2.576155462184874,
|
|
"grad_norm": 11.272245404964119,
|
|
"learning_rate": 5.974361661347889e-07,
|
|
"loss": 0.4126380681991577,
|
|
"step": 4905
|
|
},
|
|
{
|
|
"epoch": 2.5766806722689077,
|
|
"grad_norm": 10.180383869453076,
|
|
"learning_rate": 5.959883646356013e-07,
|
|
"loss": 0.6835031509399414,
|
|
"step": 4906
|
|
},
|
|
{
|
|
"epoch": 2.577205882352941,
|
|
"grad_norm": 9.60162119919138,
|
|
"learning_rate": 5.945422083487284e-07,
|
|
"loss": 0.3895748257637024,
|
|
"step": 4907
|
|
},
|
|
{
|
|
"epoch": 2.5777310924369745,
|
|
"grad_norm": 9.730603159694775,
|
|
"learning_rate": 5.930976978144132e-07,
|
|
"loss": 0.2707878053188324,
|
|
"step": 4908
|
|
},
|
|
{
|
|
"epoch": 2.5782563025210083,
|
|
"grad_norm": 11.542625343752965,
|
|
"learning_rate": 5.916548335722822e-07,
|
|
"loss": 0.5062350034713745,
|
|
"step": 4909
|
|
},
|
|
{
|
|
"epoch": 2.578781512605042,
|
|
"grad_norm": 7.81585480822707,
|
|
"learning_rate": 5.90213616161347e-07,
|
|
"loss": 0.632489800453186,
|
|
"step": 4910
|
|
},
|
|
{
|
|
"epoch": 2.5793067226890756,
|
|
"grad_norm": 7.853902566417403,
|
|
"learning_rate": 5.887740461200081e-07,
|
|
"loss": 0.8873011469841003,
|
|
"step": 4911
|
|
},
|
|
{
|
|
"epoch": 2.5798319327731094,
|
|
"grad_norm": 15.662605069794354,
|
|
"learning_rate": 5.873361239860464e-07,
|
|
"loss": 0.48568254709243774,
|
|
"step": 4912
|
|
},
|
|
{
|
|
"epoch": 2.580357142857143,
|
|
"grad_norm": 18.02139388229487,
|
|
"learning_rate": 5.858998502966273e-07,
|
|
"loss": 0.7427787780761719,
|
|
"step": 4913
|
|
},
|
|
{
|
|
"epoch": 2.5808823529411766,
|
|
"grad_norm": 18.37329751558733,
|
|
"learning_rate": 5.844652255883026e-07,
|
|
"loss": 0.5184937715530396,
|
|
"step": 4914
|
|
},
|
|
{
|
|
"epoch": 2.58140756302521,
|
|
"grad_norm": 12.736909638492465,
|
|
"learning_rate": 5.83032250397006e-07,
|
|
"loss": 0.7362766861915588,
|
|
"step": 4915
|
|
},
|
|
{
|
|
"epoch": 2.581932773109244,
|
|
"grad_norm": 9.187846248254253,
|
|
"learning_rate": 5.816009252580568e-07,
|
|
"loss": 0.2318895310163498,
|
|
"step": 4916
|
|
},
|
|
{
|
|
"epoch": 2.5824579831932772,
|
|
"grad_norm": 13.861756008045637,
|
|
"learning_rate": 5.801712507061563e-07,
|
|
"loss": 0.36642661690711975,
|
|
"step": 4917
|
|
},
|
|
{
|
|
"epoch": 2.582983193277311,
|
|
"grad_norm": 7.653698361429107,
|
|
"learning_rate": 5.787432272753885e-07,
|
|
"loss": 0.38152921199798584,
|
|
"step": 4918
|
|
},
|
|
{
|
|
"epoch": 2.5835084033613445,
|
|
"grad_norm": 11.877417012201363,
|
|
"learning_rate": 5.773168554992248e-07,
|
|
"loss": 0.42070138454437256,
|
|
"step": 4919
|
|
},
|
|
{
|
|
"epoch": 2.5840336134453783,
|
|
"grad_norm": 11.08707915277657,
|
|
"learning_rate": 5.758921359105158e-07,
|
|
"loss": 0.22507277131080627,
|
|
"step": 4920
|
|
},
|
|
{
|
|
"epoch": 2.5845588235294117,
|
|
"grad_norm": 14.454471628961176,
|
|
"learning_rate": 5.74469069041495e-07,
|
|
"loss": 0.23637282848358154,
|
|
"step": 4921
|
|
},
|
|
{
|
|
"epoch": 2.5850840336134455,
|
|
"grad_norm": 9.845238903312321,
|
|
"learning_rate": 5.730476554237801e-07,
|
|
"loss": 0.3867771327495575,
|
|
"step": 4922
|
|
},
|
|
{
|
|
"epoch": 2.585609243697479,
|
|
"grad_norm": 8.433582715329326,
|
|
"learning_rate": 5.716278955883703e-07,
|
|
"loss": 0.35116636753082275,
|
|
"step": 4923
|
|
},
|
|
{
|
|
"epoch": 2.5861344537815127,
|
|
"grad_norm": 10.06509714731401,
|
|
"learning_rate": 5.702097900656466e-07,
|
|
"loss": 0.2620098888874054,
|
|
"step": 4924
|
|
},
|
|
{
|
|
"epoch": 2.586659663865546,
|
|
"grad_norm": 12.524717422952994,
|
|
"learning_rate": 5.687933393853718e-07,
|
|
"loss": 0.5872154235839844,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 2.58718487394958,
|
|
"grad_norm": 10.160159474398997,
|
|
"learning_rate": 5.673785440766938e-07,
|
|
"loss": 0.38177043199539185,
|
|
"step": 4926
|
|
},
|
|
{
|
|
"epoch": 2.5877100840336134,
|
|
"grad_norm": 7.802478214541245,
|
|
"learning_rate": 5.659654046681373e-07,
|
|
"loss": 0.39889341592788696,
|
|
"step": 4927
|
|
},
|
|
{
|
|
"epoch": 2.588235294117647,
|
|
"grad_norm": 15.869754176021893,
|
|
"learning_rate": 5.645539216876117e-07,
|
|
"loss": 0.8242252469062805,
|
|
"step": 4928
|
|
},
|
|
{
|
|
"epoch": 2.5887605042016806,
|
|
"grad_norm": 9.934567067619742,
|
|
"learning_rate": 5.631440956624057e-07,
|
|
"loss": 0.2728886902332306,
|
|
"step": 4929
|
|
},
|
|
{
|
|
"epoch": 2.5892857142857144,
|
|
"grad_norm": 10.096954744165794,
|
|
"learning_rate": 5.617359271191908e-07,
|
|
"loss": 0.7107230424880981,
|
|
"step": 4930
|
|
},
|
|
{
|
|
"epoch": 2.589810924369748,
|
|
"grad_norm": 7.848418440603656,
|
|
"learning_rate": 5.603294165840173e-07,
|
|
"loss": 0.21378864347934723,
|
|
"step": 4931
|
|
},
|
|
{
|
|
"epoch": 2.5903361344537816,
|
|
"grad_norm": 15.743830660589166,
|
|
"learning_rate": 5.589245645823177e-07,
|
|
"loss": 0.7895803451538086,
|
|
"step": 4932
|
|
},
|
|
{
|
|
"epoch": 2.590861344537815,
|
|
"grad_norm": 9.572214660478107,
|
|
"learning_rate": 5.575213716389039e-07,
|
|
"loss": 0.3168739080429077,
|
|
"step": 4933
|
|
},
|
|
{
|
|
"epoch": 2.591386554621849,
|
|
"grad_norm": 14.73096843305296,
|
|
"learning_rate": 5.561198382779692e-07,
|
|
"loss": 1.4471051692962646,
|
|
"step": 4934
|
|
},
|
|
{
|
|
"epoch": 2.5919117647058822,
|
|
"grad_norm": 13.376052837695921,
|
|
"learning_rate": 5.547199650230862e-07,
|
|
"loss": 1.2652502059936523,
|
|
"step": 4935
|
|
},
|
|
{
|
|
"epoch": 2.592436974789916,
|
|
"grad_norm": 11.975741587074959,
|
|
"learning_rate": 5.533217523972073e-07,
|
|
"loss": 0.26494717597961426,
|
|
"step": 4936
|
|
},
|
|
{
|
|
"epoch": 2.5929621848739495,
|
|
"grad_norm": 9.466706069351542,
|
|
"learning_rate": 5.519252009226639e-07,
|
|
"loss": 0.532478928565979,
|
|
"step": 4937
|
|
},
|
|
{
|
|
"epoch": 2.5934873949579833,
|
|
"grad_norm": 10.098536916532042,
|
|
"learning_rate": 5.505303111211685e-07,
|
|
"loss": 0.9167251586914062,
|
|
"step": 4938
|
|
},
|
|
{
|
|
"epoch": 2.5940126050420167,
|
|
"grad_norm": 13.980146929135243,
|
|
"learning_rate": 5.491370835138116e-07,
|
|
"loss": 0.7907119989395142,
|
|
"step": 4939
|
|
},
|
|
{
|
|
"epoch": 2.5945378151260505,
|
|
"grad_norm": 8.081646284203565,
|
|
"learning_rate": 5.477455186210612e-07,
|
|
"loss": 0.5667487978935242,
|
|
"step": 4940
|
|
},
|
|
{
|
|
"epoch": 2.595063025210084,
|
|
"grad_norm": 9.20277774123809,
|
|
"learning_rate": 5.463556169627687e-07,
|
|
"loss": 0.4445914924144745,
|
|
"step": 4941
|
|
},
|
|
{
|
|
"epoch": 2.5955882352941178,
|
|
"grad_norm": 9.9060628243416,
|
|
"learning_rate": 5.449673790581611e-07,
|
|
"loss": 0.3172542452812195,
|
|
"step": 4942
|
|
},
|
|
{
|
|
"epoch": 2.596113445378151,
|
|
"grad_norm": 10.903004010929578,
|
|
"learning_rate": 5.435808054258429e-07,
|
|
"loss": 0.49419260025024414,
|
|
"step": 4943
|
|
},
|
|
{
|
|
"epoch": 2.596638655462185,
|
|
"grad_norm": 9.1126139120858,
|
|
"learning_rate": 5.421958965837993e-07,
|
|
"loss": 0.26421844959259033,
|
|
"step": 4944
|
|
},
|
|
{
|
|
"epoch": 2.5971638655462184,
|
|
"grad_norm": 22.62074914624199,
|
|
"learning_rate": 5.408126530493918e-07,
|
|
"loss": 0.923978328704834,
|
|
"step": 4945
|
|
},
|
|
{
|
|
"epoch": 2.597689075630252,
|
|
"grad_norm": 12.447667010920588,
|
|
"learning_rate": 5.394310753393606e-07,
|
|
"loss": 0.25522667169570923,
|
|
"step": 4946
|
|
},
|
|
{
|
|
"epoch": 2.5982142857142856,
|
|
"grad_norm": 8.768505787866767,
|
|
"learning_rate": 5.380511639698227e-07,
|
|
"loss": 0.2672596573829651,
|
|
"step": 4947
|
|
},
|
|
{
|
|
"epoch": 2.5987394957983194,
|
|
"grad_norm": 16.844998395265588,
|
|
"learning_rate": 5.366729194562747e-07,
|
|
"loss": 0.4241945743560791,
|
|
"step": 4948
|
|
},
|
|
{
|
|
"epoch": 2.599264705882353,
|
|
"grad_norm": 9.95635249340883,
|
|
"learning_rate": 5.352963423135893e-07,
|
|
"loss": 0.4814796447753906,
|
|
"step": 4949
|
|
},
|
|
{
|
|
"epoch": 2.5997899159663866,
|
|
"grad_norm": 8.542234566665938,
|
|
"learning_rate": 5.339214330560155e-07,
|
|
"loss": 0.3713931441307068,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 2.60031512605042,
|
|
"grad_norm": 14.735836273599324,
|
|
"learning_rate": 5.325481921971804e-07,
|
|
"loss": 0.28314444422721863,
|
|
"step": 4951
|
|
},
|
|
{
|
|
"epoch": 2.600840336134454,
|
|
"grad_norm": 11.93953246422329,
|
|
"learning_rate": 5.311766202500868e-07,
|
|
"loss": 0.5708507895469666,
|
|
"step": 4952
|
|
},
|
|
{
|
|
"epoch": 2.6013655462184873,
|
|
"grad_norm": 14.320011508280166,
|
|
"learning_rate": 5.298067177271144e-07,
|
|
"loss": 0.5337470769882202,
|
|
"step": 4953
|
|
},
|
|
{
|
|
"epoch": 2.601890756302521,
|
|
"grad_norm": 8.12294923810274,
|
|
"learning_rate": 5.284384851400204e-07,
|
|
"loss": 0.2840821146965027,
|
|
"step": 4954
|
|
},
|
|
{
|
|
"epoch": 2.6024159663865545,
|
|
"grad_norm": 14.776064194771555,
|
|
"learning_rate": 5.270719229999355e-07,
|
|
"loss": 0.3523826599121094,
|
|
"step": 4955
|
|
},
|
|
{
|
|
"epoch": 2.6029411764705883,
|
|
"grad_norm": 15.188553645729352,
|
|
"learning_rate": 5.257070318173702e-07,
|
|
"loss": 0.6922599077224731,
|
|
"step": 4956
|
|
},
|
|
{
|
|
"epoch": 2.6034663865546217,
|
|
"grad_norm": 8.866611699424793,
|
|
"learning_rate": 5.243438121022077e-07,
|
|
"loss": 0.352006733417511,
|
|
"step": 4957
|
|
},
|
|
{
|
|
"epoch": 2.6039915966386555,
|
|
"grad_norm": 15.282931728080383,
|
|
"learning_rate": 5.229822643637078e-07,
|
|
"loss": 0.6487103700637817,
|
|
"step": 4958
|
|
},
|
|
{
|
|
"epoch": 2.604516806722689,
|
|
"grad_norm": 14.542059450480222,
|
|
"learning_rate": 5.216223891105054e-07,
|
|
"loss": 1.1369600296020508,
|
|
"step": 4959
|
|
},
|
|
{
|
|
"epoch": 2.6050420168067228,
|
|
"grad_norm": 9.159200705510656,
|
|
"learning_rate": 5.202641868506115e-07,
|
|
"loss": 0.44024384021759033,
|
|
"step": 4960
|
|
},
|
|
{
|
|
"epoch": 2.605567226890756,
|
|
"grad_norm": 9.223462725611745,
|
|
"learning_rate": 5.18907658091411e-07,
|
|
"loss": 0.5041219592094421,
|
|
"step": 4961
|
|
},
|
|
{
|
|
"epoch": 2.60609243697479,
|
|
"grad_norm": 17.68379411030876,
|
|
"learning_rate": 5.175528033396632e-07,
|
|
"loss": 1.0955138206481934,
|
|
"step": 4962
|
|
},
|
|
{
|
|
"epoch": 2.6066176470588234,
|
|
"grad_norm": 7.785254889745292,
|
|
"learning_rate": 5.161996231015049e-07,
|
|
"loss": 0.40684613585472107,
|
|
"step": 4963
|
|
},
|
|
{
|
|
"epoch": 2.607142857142857,
|
|
"grad_norm": 11.738092258208198,
|
|
"learning_rate": 5.148481178824438e-07,
|
|
"loss": 0.7205455303192139,
|
|
"step": 4964
|
|
},
|
|
{
|
|
"epoch": 2.6076680672268906,
|
|
"grad_norm": 10.22256791991857,
|
|
"learning_rate": 5.134982881873646e-07,
|
|
"loss": 0.44471150636672974,
|
|
"step": 4965
|
|
},
|
|
{
|
|
"epoch": 2.6081932773109244,
|
|
"grad_norm": 10.969187643944052,
|
|
"learning_rate": 5.121501345205243e-07,
|
|
"loss": 0.6944993734359741,
|
|
"step": 4966
|
|
},
|
|
{
|
|
"epoch": 2.608718487394958,
|
|
"grad_norm": 11.13196585678083,
|
|
"learning_rate": 5.10803657385554e-07,
|
|
"loss": 0.459159791469574,
|
|
"step": 4967
|
|
},
|
|
{
|
|
"epoch": 2.6092436974789917,
|
|
"grad_norm": 11.153613586703113,
|
|
"learning_rate": 5.094588572854603e-07,
|
|
"loss": 0.3382960855960846,
|
|
"step": 4968
|
|
},
|
|
{
|
|
"epoch": 2.6097689075630255,
|
|
"grad_norm": 15.162025155832893,
|
|
"learning_rate": 5.081157347226201e-07,
|
|
"loss": 0.7505803108215332,
|
|
"step": 4969
|
|
},
|
|
{
|
|
"epoch": 2.610294117647059,
|
|
"grad_norm": 11.696195669171312,
|
|
"learning_rate": 5.067742901987855e-07,
|
|
"loss": 0.3236784338951111,
|
|
"step": 4970
|
|
},
|
|
{
|
|
"epoch": 2.6108193277310923,
|
|
"grad_norm": 12.108939877118239,
|
|
"learning_rate": 5.054345242150837e-07,
|
|
"loss": 0.6487278938293457,
|
|
"step": 4971
|
|
},
|
|
{
|
|
"epoch": 2.611344537815126,
|
|
"grad_norm": 15.672319025763857,
|
|
"learning_rate": 5.040964372720114e-07,
|
|
"loss": 0.7085149884223938,
|
|
"step": 4972
|
|
},
|
|
{
|
|
"epoch": 2.61186974789916,
|
|
"grad_norm": 9.170665570666213,
|
|
"learning_rate": 5.027600298694397e-07,
|
|
"loss": 0.4233211576938629,
|
|
"step": 4973
|
|
},
|
|
{
|
|
"epoch": 2.6123949579831933,
|
|
"grad_norm": 16.20458097285681,
|
|
"learning_rate": 5.014253025066124e-07,
|
|
"loss": 0.39938005805015564,
|
|
"step": 4974
|
|
},
|
|
{
|
|
"epoch": 2.6129201680672267,
|
|
"grad_norm": 10.635478798920339,
|
|
"learning_rate": 5.000922556821442e-07,
|
|
"loss": 0.4904247224330902,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 2.6134453781512605,
|
|
"grad_norm": 10.93500267126394,
|
|
"learning_rate": 4.987608898940244e-07,
|
|
"loss": 0.49353671073913574,
|
|
"step": 4976
|
|
},
|
|
{
|
|
"epoch": 2.6139705882352944,
|
|
"grad_norm": 11.129413067710383,
|
|
"learning_rate": 4.974312056396113e-07,
|
|
"loss": 0.3361518979072571,
|
|
"step": 4977
|
|
},
|
|
{
|
|
"epoch": 2.6144957983193278,
|
|
"grad_norm": 12.31059324856716,
|
|
"learning_rate": 4.961032034156388e-07,
|
|
"loss": 0.2596784830093384,
|
|
"step": 4978
|
|
},
|
|
{
|
|
"epoch": 2.615021008403361,
|
|
"grad_norm": 15.409159300351133,
|
|
"learning_rate": 4.947768837182093e-07,
|
|
"loss": 0.41441357135772705,
|
|
"step": 4979
|
|
},
|
|
{
|
|
"epoch": 2.615546218487395,
|
|
"grad_norm": 20.174977892561795,
|
|
"learning_rate": 4.934522470427983e-07,
|
|
"loss": 0.9614930748939514,
|
|
"step": 4980
|
|
},
|
|
{
|
|
"epoch": 2.616071428571429,
|
|
"grad_norm": 6.601700605135325,
|
|
"learning_rate": 4.92129293884252e-07,
|
|
"loss": 0.26034635305404663,
|
|
"step": 4981
|
|
},
|
|
{
|
|
"epoch": 2.616596638655462,
|
|
"grad_norm": 15.349848786614412,
|
|
"learning_rate": 4.908080247367869e-07,
|
|
"loss": 0.560057520866394,
|
|
"step": 4982
|
|
},
|
|
{
|
|
"epoch": 2.6171218487394956,
|
|
"grad_norm": 9.326176327713412,
|
|
"learning_rate": 4.89488440093992e-07,
|
|
"loss": 0.6545529961585999,
|
|
"step": 4983
|
|
},
|
|
{
|
|
"epoch": 2.6176470588235294,
|
|
"grad_norm": 9.633776927291555,
|
|
"learning_rate": 4.881705404488252e-07,
|
|
"loss": 0.5484632253646851,
|
|
"step": 4984
|
|
},
|
|
{
|
|
"epoch": 2.6181722689075633,
|
|
"grad_norm": 17.935161540688796,
|
|
"learning_rate": 4.868543262936176e-07,
|
|
"loss": 1.4751113653182983,
|
|
"step": 4985
|
|
},
|
|
{
|
|
"epoch": 2.6186974789915967,
|
|
"grad_norm": 12.789476810153891,
|
|
"learning_rate": 4.855397981200682e-07,
|
|
"loss": 2.3696200847625732,
|
|
"step": 4986
|
|
},
|
|
{
|
|
"epoch": 2.61922268907563,
|
|
"grad_norm": 10.784548811215974,
|
|
"learning_rate": 4.842269564192475e-07,
|
|
"loss": 0.29669246077537537,
|
|
"step": 4987
|
|
},
|
|
{
|
|
"epoch": 2.619747899159664,
|
|
"grad_norm": 11.192374082774652,
|
|
"learning_rate": 4.829158016815944e-07,
|
|
"loss": 0.5222846269607544,
|
|
"step": 4988
|
|
},
|
|
{
|
|
"epoch": 2.6202731092436977,
|
|
"grad_norm": 14.040186671829293,
|
|
"learning_rate": 4.816063343969196e-07,
|
|
"loss": 0.7106570601463318,
|
|
"step": 4989
|
|
},
|
|
{
|
|
"epoch": 2.620798319327731,
|
|
"grad_norm": 8.559378630416449,
|
|
"learning_rate": 4.802985550544015e-07,
|
|
"loss": 0.3928842842578888,
|
|
"step": 4990
|
|
},
|
|
{
|
|
"epoch": 2.6213235294117645,
|
|
"grad_norm": 10.384040243522856,
|
|
"learning_rate": 4.789924641425892e-07,
|
|
"loss": 0.2502846121788025,
|
|
"step": 4991
|
|
},
|
|
{
|
|
"epoch": 2.6218487394957983,
|
|
"grad_norm": 12.61645409509892,
|
|
"learning_rate": 4.776880621494018e-07,
|
|
"loss": 0.4573371410369873,
|
|
"step": 4992
|
|
},
|
|
{
|
|
"epoch": 2.622373949579832,
|
|
"grad_norm": 9.581021902806178,
|
|
"learning_rate": 4.763853495621251e-07,
|
|
"loss": 0.3357299566268921,
|
|
"step": 4993
|
|
},
|
|
{
|
|
"epoch": 2.6228991596638656,
|
|
"grad_norm": 13.669627820315572,
|
|
"learning_rate": 4.7508432686741654e-07,
|
|
"loss": 0.19969376921653748,
|
|
"step": 4994
|
|
},
|
|
{
|
|
"epoch": 2.623424369747899,
|
|
"grad_norm": 10.384947809777664,
|
|
"learning_rate": 4.7378499455129955e-07,
|
|
"loss": 0.45344555377960205,
|
|
"step": 4995
|
|
},
|
|
{
|
|
"epoch": 2.6239495798319328,
|
|
"grad_norm": 9.542233395549253,
|
|
"learning_rate": 4.724873530991686e-07,
|
|
"loss": 0.46982109546661377,
|
|
"step": 4996
|
|
},
|
|
{
|
|
"epoch": 2.6244747899159666,
|
|
"grad_norm": 6.954420666419455,
|
|
"learning_rate": 4.7119140299578424e-07,
|
|
"loss": 0.4305351972579956,
|
|
"step": 4997
|
|
},
|
|
{
|
|
"epoch": 2.625,
|
|
"grad_norm": 8.80165104637619,
|
|
"learning_rate": 4.6989714472527705e-07,
|
|
"loss": 0.3331296443939209,
|
|
"step": 4998
|
|
},
|
|
{
|
|
"epoch": 2.6255252100840334,
|
|
"grad_norm": 329.8798479078397,
|
|
"learning_rate": 4.686045787711435e-07,
|
|
"loss": 2.238851308822632,
|
|
"step": 4999
|
|
},
|
|
{
|
|
"epoch": 2.6260504201680672,
|
|
"grad_norm": 11.880667119518066,
|
|
"learning_rate": 4.673137056162508e-07,
|
|
"loss": 0.5143446922302246,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 2.626575630252101,
|
|
"grad_norm": 15.530134939738671,
|
|
"learning_rate": 4.660245257428297e-07,
|
|
"loss": 0.47613704204559326,
|
|
"step": 5001
|
|
},
|
|
{
|
|
"epoch": 2.6271008403361344,
|
|
"grad_norm": 10.199669714538224,
|
|
"learning_rate": 4.647370396324835e-07,
|
|
"loss": 0.5171878933906555,
|
|
"step": 5002
|
|
},
|
|
{
|
|
"epoch": 2.627626050420168,
|
|
"grad_norm": 10.302973272122806,
|
|
"learning_rate": 4.6345124776617847e-07,
|
|
"loss": 0.39039042592048645,
|
|
"step": 5003
|
|
},
|
|
{
|
|
"epoch": 2.6281512605042017,
|
|
"grad_norm": 7.315377061463184,
|
|
"learning_rate": 4.621671506242503e-07,
|
|
"loss": 0.5917413830757141,
|
|
"step": 5004
|
|
},
|
|
{
|
|
"epoch": 2.6286764705882355,
|
|
"grad_norm": 11.848402962179168,
|
|
"learning_rate": 4.6088474868640045e-07,
|
|
"loss": 0.26132234930992126,
|
|
"step": 5005
|
|
},
|
|
{
|
|
"epoch": 2.629201680672269,
|
|
"grad_norm": 14.072034274867068,
|
|
"learning_rate": 4.596040424316972e-07,
|
|
"loss": 0.8253778219223022,
|
|
"step": 5006
|
|
},
|
|
{
|
|
"epoch": 2.6297268907563023,
|
|
"grad_norm": 16.722932872957276,
|
|
"learning_rate": 4.583250323385763e-07,
|
|
"loss": 0.28480014204978943,
|
|
"step": 5007
|
|
},
|
|
{
|
|
"epoch": 2.630252100840336,
|
|
"grad_norm": 8.687887171112392,
|
|
"learning_rate": 4.570477188848377e-07,
|
|
"loss": 0.21405500173568726,
|
|
"step": 5008
|
|
},
|
|
{
|
|
"epoch": 2.63077731092437,
|
|
"grad_norm": 9.063060080742835,
|
|
"learning_rate": 4.557721025476508e-07,
|
|
"loss": 0.8213982582092285,
|
|
"step": 5009
|
|
},
|
|
{
|
|
"epoch": 2.6313025210084033,
|
|
"grad_norm": 10.684155451899711,
|
|
"learning_rate": 4.5449818380354895e-07,
|
|
"loss": 0.5271738767623901,
|
|
"step": 5010
|
|
},
|
|
{
|
|
"epoch": 2.6318277310924367,
|
|
"grad_norm": 13.21988084784808,
|
|
"learning_rate": 4.532259631284308e-07,
|
|
"loss": 0.7251818180084229,
|
|
"step": 5011
|
|
},
|
|
{
|
|
"epoch": 2.6323529411764706,
|
|
"grad_norm": 9.213945777737939,
|
|
"learning_rate": 4.519554409975624e-07,
|
|
"loss": 0.7404474020004272,
|
|
"step": 5012
|
|
},
|
|
{
|
|
"epoch": 2.6328781512605044,
|
|
"grad_norm": 10.19667787197892,
|
|
"learning_rate": 4.5068661788557345e-07,
|
|
"loss": 0.26499176025390625,
|
|
"step": 5013
|
|
},
|
|
{
|
|
"epoch": 2.633403361344538,
|
|
"grad_norm": 12.277780908040612,
|
|
"learning_rate": 4.4941949426646034e-07,
|
|
"loss": 0.5239801406860352,
|
|
"step": 5014
|
|
},
|
|
{
|
|
"epoch": 2.633928571428571,
|
|
"grad_norm": 17.096414490131995,
|
|
"learning_rate": 4.481540706135845e-07,
|
|
"loss": 0.36629125475883484,
|
|
"step": 5015
|
|
},
|
|
{
|
|
"epoch": 2.634453781512605,
|
|
"grad_norm": 12.241158966036348,
|
|
"learning_rate": 4.4689034739966994e-07,
|
|
"loss": 0.8801144361495972,
|
|
"step": 5016
|
|
},
|
|
{
|
|
"epoch": 2.634978991596639,
|
|
"grad_norm": 15.69368639398231,
|
|
"learning_rate": 4.4562832509680963e-07,
|
|
"loss": 0.5607030987739563,
|
|
"step": 5017
|
|
},
|
|
{
|
|
"epoch": 2.6355042016806722,
|
|
"grad_norm": 15.44521256084083,
|
|
"learning_rate": 4.4436800417645863e-07,
|
|
"loss": 0.48291152715682983,
|
|
"step": 5018
|
|
},
|
|
{
|
|
"epoch": 2.6360294117647056,
|
|
"grad_norm": 13.63791560133695,
|
|
"learning_rate": 4.4310938510943533e-07,
|
|
"loss": 0.5484304428100586,
|
|
"step": 5019
|
|
},
|
|
{
|
|
"epoch": 2.6365546218487395,
|
|
"grad_norm": 6.534181883845186,
|
|
"learning_rate": 4.4185246836592475e-07,
|
|
"loss": 0.2272014170885086,
|
|
"step": 5020
|
|
},
|
|
{
|
|
"epoch": 2.6370798319327733,
|
|
"grad_norm": 8.219637050497115,
|
|
"learning_rate": 4.4059725441547464e-07,
|
|
"loss": 0.4130197763442993,
|
|
"step": 5021
|
|
},
|
|
{
|
|
"epoch": 2.6376050420168067,
|
|
"grad_norm": 9.126100017102663,
|
|
"learning_rate": 4.3934374372699704e-07,
|
|
"loss": 0.44093483686447144,
|
|
"step": 5022
|
|
},
|
|
{
|
|
"epoch": 2.63813025210084,
|
|
"grad_norm": 6.227653400264672,
|
|
"learning_rate": 4.3809193676876584e-07,
|
|
"loss": 0.2749183773994446,
|
|
"step": 5023
|
|
},
|
|
{
|
|
"epoch": 2.638655462184874,
|
|
"grad_norm": 15.142643835997328,
|
|
"learning_rate": 4.3684183400842297e-07,
|
|
"loss": 0.4120340943336487,
|
|
"step": 5024
|
|
},
|
|
{
|
|
"epoch": 2.6391806722689077,
|
|
"grad_norm": 14.07794606682438,
|
|
"learning_rate": 4.355934359129699e-07,
|
|
"loss": 0.6837051510810852,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 2.639705882352941,
|
|
"grad_norm": 10.618887524116545,
|
|
"learning_rate": 4.343467429487719e-07,
|
|
"loss": 0.9854604601860046,
|
|
"step": 5026
|
|
},
|
|
{
|
|
"epoch": 2.6402310924369745,
|
|
"grad_norm": 9.556604938209075,
|
|
"learning_rate": 4.331017555815575e-07,
|
|
"loss": 0.3573746383190155,
|
|
"step": 5027
|
|
},
|
|
{
|
|
"epoch": 2.6407563025210083,
|
|
"grad_norm": 10.14366608903893,
|
|
"learning_rate": 4.318584742764187e-07,
|
|
"loss": 0.39185842871665955,
|
|
"step": 5028
|
|
},
|
|
{
|
|
"epoch": 2.641281512605042,
|
|
"grad_norm": 9.47396003753919,
|
|
"learning_rate": 4.3061689949780995e-07,
|
|
"loss": 0.4061623513698578,
|
|
"step": 5029
|
|
},
|
|
{
|
|
"epoch": 2.6418067226890756,
|
|
"grad_norm": 9.172234088657502,
|
|
"learning_rate": 4.2937703170954635e-07,
|
|
"loss": 0.715480625629425,
|
|
"step": 5030
|
|
},
|
|
{
|
|
"epoch": 2.6423319327731094,
|
|
"grad_norm": 10.649151903912319,
|
|
"learning_rate": 4.281388713748091e-07,
|
|
"loss": 0.7693527340888977,
|
|
"step": 5031
|
|
},
|
|
{
|
|
"epoch": 2.642857142857143,
|
|
"grad_norm": 10.74590409758232,
|
|
"learning_rate": 4.269024189561383e-07,
|
|
"loss": 0.6536192893981934,
|
|
"step": 5032
|
|
},
|
|
{
|
|
"epoch": 2.6433823529411766,
|
|
"grad_norm": 8.0265756243527,
|
|
"learning_rate": 4.2566767491543706e-07,
|
|
"loss": 0.1733337789773941,
|
|
"step": 5033
|
|
},
|
|
{
|
|
"epoch": 2.64390756302521,
|
|
"grad_norm": 13.74760894584026,
|
|
"learning_rate": 4.2443463971397094e-07,
|
|
"loss": 0.46782687306404114,
|
|
"step": 5034
|
|
},
|
|
{
|
|
"epoch": 2.644432773109244,
|
|
"grad_norm": 9.248094269141976,
|
|
"learning_rate": 4.2320331381236535e-07,
|
|
"loss": 0.5997669696807861,
|
|
"step": 5035
|
|
},
|
|
{
|
|
"epoch": 2.6449579831932772,
|
|
"grad_norm": 12.099957296138106,
|
|
"learning_rate": 4.2197369767060904e-07,
|
|
"loss": 0.5305054187774658,
|
|
"step": 5036
|
|
},
|
|
{
|
|
"epoch": 2.645483193277311,
|
|
"grad_norm": 7.944928665825679,
|
|
"learning_rate": 4.2074579174805173e-07,
|
|
"loss": 0.4841510057449341,
|
|
"step": 5037
|
|
},
|
|
{
|
|
"epoch": 2.6460084033613445,
|
|
"grad_norm": 9.634128082702697,
|
|
"learning_rate": 4.195195965034016e-07,
|
|
"loss": 0.44690483808517456,
|
|
"step": 5038
|
|
},
|
|
{
|
|
"epoch": 2.6465336134453783,
|
|
"grad_norm": 10.374367688327673,
|
|
"learning_rate": 4.182951123947332e-07,
|
|
"loss": 0.6667125821113586,
|
|
"step": 5039
|
|
},
|
|
{
|
|
"epoch": 2.6470588235294117,
|
|
"grad_norm": 10.663043440372022,
|
|
"learning_rate": 4.1707233987947683e-07,
|
|
"loss": 0.7713562846183777,
|
|
"step": 5040
|
|
},
|
|
{
|
|
"epoch": 2.6475840336134455,
|
|
"grad_norm": 5.947725818998819,
|
|
"learning_rate": 4.1585127941442536e-07,
|
|
"loss": 0.14723870158195496,
|
|
"step": 5041
|
|
},
|
|
{
|
|
"epoch": 2.648109243697479,
|
|
"grad_norm": 9.584843587310504,
|
|
"learning_rate": 4.1463193145573175e-07,
|
|
"loss": 0.292923241853714,
|
|
"step": 5042
|
|
},
|
|
{
|
|
"epoch": 2.6486344537815127,
|
|
"grad_norm": 8.914960563211729,
|
|
"learning_rate": 4.134142964589105e-07,
|
|
"loss": 0.5311110019683838,
|
|
"step": 5043
|
|
},
|
|
{
|
|
"epoch": 2.649159663865546,
|
|
"grad_norm": 16.191387434150904,
|
|
"learning_rate": 4.121983748788333e-07,
|
|
"loss": 0.6045875549316406,
|
|
"step": 5044
|
|
},
|
|
{
|
|
"epoch": 2.64968487394958,
|
|
"grad_norm": 10.7545985171071,
|
|
"learning_rate": 4.1098416716973457e-07,
|
|
"loss": 0.7185678482055664,
|
|
"step": 5045
|
|
},
|
|
{
|
|
"epoch": 2.6502100840336134,
|
|
"grad_norm": 10.785013009842046,
|
|
"learning_rate": 4.0977167378520757e-07,
|
|
"loss": 0.3030956983566284,
|
|
"step": 5046
|
|
},
|
|
{
|
|
"epoch": 2.650735294117647,
|
|
"grad_norm": 9.166300126067835,
|
|
"learning_rate": 4.0856089517820495e-07,
|
|
"loss": 0.2926194667816162,
|
|
"step": 5047
|
|
},
|
|
{
|
|
"epoch": 2.6512605042016806,
|
|
"grad_norm": 7.437105049032209,
|
|
"learning_rate": 4.073518318010389e-07,
|
|
"loss": 0.5140249729156494,
|
|
"step": 5048
|
|
},
|
|
{
|
|
"epoch": 2.6517857142857144,
|
|
"grad_norm": 10.374503355191733,
|
|
"learning_rate": 4.0614448410538077e-07,
|
|
"loss": 0.43929004669189453,
|
|
"step": 5049
|
|
},
|
|
{
|
|
"epoch": 2.652310924369748,
|
|
"grad_norm": 11.917484956714038,
|
|
"learning_rate": 4.0493885254226037e-07,
|
|
"loss": 0.2109602987766266,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 2.6528361344537816,
|
|
"grad_norm": 11.482493297152423,
|
|
"learning_rate": 4.037349375620675e-07,
|
|
"loss": 0.7243170738220215,
|
|
"step": 5051
|
|
},
|
|
{
|
|
"epoch": 2.653361344537815,
|
|
"grad_norm": 14.272292392510508,
|
|
"learning_rate": 4.0253273961455065e-07,
|
|
"loss": 0.3412622809410095,
|
|
"step": 5052
|
|
},
|
|
{
|
|
"epoch": 2.653886554621849,
|
|
"grad_norm": 9.552330371113477,
|
|
"learning_rate": 4.01332259148815e-07,
|
|
"loss": 0.6411492228507996,
|
|
"step": 5053
|
|
},
|
|
{
|
|
"epoch": 2.6544117647058822,
|
|
"grad_norm": 8.022015111837447,
|
|
"learning_rate": 4.0013349661332745e-07,
|
|
"loss": 0.3606320321559906,
|
|
"step": 5054
|
|
},
|
|
{
|
|
"epoch": 2.654936974789916,
|
|
"grad_norm": 19.069147101970064,
|
|
"learning_rate": 3.989364524559108e-07,
|
|
"loss": 0.7307056188583374,
|
|
"step": 5055
|
|
},
|
|
{
|
|
"epoch": 2.6554621848739495,
|
|
"grad_norm": 21.058146930449862,
|
|
"learning_rate": 3.977411271237458e-07,
|
|
"loss": 0.5460872054100037,
|
|
"step": 5056
|
|
},
|
|
{
|
|
"epoch": 2.6559873949579833,
|
|
"grad_norm": 8.522836469199902,
|
|
"learning_rate": 3.965475210633718e-07,
|
|
"loss": 0.38311946392059326,
|
|
"step": 5057
|
|
},
|
|
{
|
|
"epoch": 2.6565126050420167,
|
|
"grad_norm": 9.746552559779182,
|
|
"learning_rate": 3.953556347206861e-07,
|
|
"loss": 0.49410921335220337,
|
|
"step": 5058
|
|
},
|
|
{
|
|
"epoch": 2.6570378151260505,
|
|
"grad_norm": 14.03211872512491,
|
|
"learning_rate": 3.9416546854094294e-07,
|
|
"loss": 0.6534441113471985,
|
|
"step": 5059
|
|
},
|
|
{
|
|
"epoch": 2.657563025210084,
|
|
"grad_norm": 11.261552326662507,
|
|
"learning_rate": 3.9297702296875297e-07,
|
|
"loss": 0.8130030035972595,
|
|
"step": 5060
|
|
},
|
|
{
|
|
"epoch": 2.6580882352941178,
|
|
"grad_norm": 9.925699429537678,
|
|
"learning_rate": 3.917902984480881e-07,
|
|
"loss": 0.3044094443321228,
|
|
"step": 5061
|
|
},
|
|
{
|
|
"epoch": 2.658613445378151,
|
|
"grad_norm": 8.940964821056612,
|
|
"learning_rate": 3.9060529542227277e-07,
|
|
"loss": 0.18114949762821198,
|
|
"step": 5062
|
|
},
|
|
{
|
|
"epoch": 2.659138655462185,
|
|
"grad_norm": 7.752457253335019,
|
|
"learning_rate": 3.894220143339905e-07,
|
|
"loss": 0.1693650186061859,
|
|
"step": 5063
|
|
},
|
|
{
|
|
"epoch": 2.6596638655462184,
|
|
"grad_norm": 17.460865938865027,
|
|
"learning_rate": 3.882404556252811e-07,
|
|
"loss": 0.5181553959846497,
|
|
"step": 5064
|
|
},
|
|
{
|
|
"epoch": 2.660189075630252,
|
|
"grad_norm": 10.440624579430413,
|
|
"learning_rate": 3.870606197375415e-07,
|
|
"loss": 0.3412543833255768,
|
|
"step": 5065
|
|
},
|
|
{
|
|
"epoch": 2.6607142857142856,
|
|
"grad_norm": 13.194396878437251,
|
|
"learning_rate": 3.8588250711152295e-07,
|
|
"loss": 0.4590100049972534,
|
|
"step": 5066
|
|
},
|
|
{
|
|
"epoch": 2.6612394957983194,
|
|
"grad_norm": 14.42595042453583,
|
|
"learning_rate": 3.8470611818733516e-07,
|
|
"loss": 0.36411648988723755,
|
|
"step": 5067
|
|
},
|
|
{
|
|
"epoch": 2.661764705882353,
|
|
"grad_norm": 12.726359330704001,
|
|
"learning_rate": 3.8353145340444486e-07,
|
|
"loss": 0.4519106149673462,
|
|
"step": 5068
|
|
},
|
|
{
|
|
"epoch": 2.6622899159663866,
|
|
"grad_norm": 13.62414776448119,
|
|
"learning_rate": 3.823585132016711e-07,
|
|
"loss": 0.39295411109924316,
|
|
"step": 5069
|
|
},
|
|
{
|
|
"epoch": 2.66281512605042,
|
|
"grad_norm": 11.306907392656061,
|
|
"learning_rate": 3.8118729801719157e-07,
|
|
"loss": 0.6925134658813477,
|
|
"step": 5070
|
|
},
|
|
{
|
|
"epoch": 2.663340336134454,
|
|
"grad_norm": 9.726202812301112,
|
|
"learning_rate": 3.800178082885386e-07,
|
|
"loss": 0.4672687351703644,
|
|
"step": 5071
|
|
},
|
|
{
|
|
"epoch": 2.6638655462184873,
|
|
"grad_norm": 10.549496219967628,
|
|
"learning_rate": 3.7885004445259997e-07,
|
|
"loss": 0.7954195141792297,
|
|
"step": 5072
|
|
},
|
|
{
|
|
"epoch": 2.664390756302521,
|
|
"grad_norm": 18.812156421013242,
|
|
"learning_rate": 3.776840069456189e-07,
|
|
"loss": 0.791772723197937,
|
|
"step": 5073
|
|
},
|
|
{
|
|
"epoch": 2.6649159663865545,
|
|
"grad_norm": 15.442799520498514,
|
|
"learning_rate": 3.765196962031925e-07,
|
|
"loss": 1.1211323738098145,
|
|
"step": 5074
|
|
},
|
|
{
|
|
"epoch": 2.6654411764705883,
|
|
"grad_norm": 9.302269880984653,
|
|
"learning_rate": 3.753571126602745e-07,
|
|
"loss": 0.29546308517456055,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 2.6659663865546217,
|
|
"grad_norm": 13.886958122168721,
|
|
"learning_rate": 3.741962567511731e-07,
|
|
"loss": 0.4888610541820526,
|
|
"step": 5076
|
|
},
|
|
{
|
|
"epoch": 2.6664915966386555,
|
|
"grad_norm": 20.780045152002426,
|
|
"learning_rate": 3.730371289095508e-07,
|
|
"loss": 0.910915732383728,
|
|
"step": 5077
|
|
},
|
|
{
|
|
"epoch": 2.667016806722689,
|
|
"grad_norm": 5.838502590757335,
|
|
"learning_rate": 3.71879729568424e-07,
|
|
"loss": 0.3355761170387268,
|
|
"step": 5078
|
|
},
|
|
{
|
|
"epoch": 2.6675420168067228,
|
|
"grad_norm": 13.460261740815776,
|
|
"learning_rate": 3.7072405916016353e-07,
|
|
"loss": 0.3116706311702728,
|
|
"step": 5079
|
|
},
|
|
{
|
|
"epoch": 2.668067226890756,
|
|
"grad_norm": 9.178354197184836,
|
|
"learning_rate": 3.6957011811649567e-07,
|
|
"loss": 0.5316522121429443,
|
|
"step": 5080
|
|
},
|
|
{
|
|
"epoch": 2.66859243697479,
|
|
"grad_norm": 17.862679280425013,
|
|
"learning_rate": 3.6841790686849897e-07,
|
|
"loss": 1.001525640487671,
|
|
"step": 5081
|
|
},
|
|
{
|
|
"epoch": 2.6691176470588234,
|
|
"grad_norm": 10.73360002274136,
|
|
"learning_rate": 3.6726742584660637e-07,
|
|
"loss": 0.7820755243301392,
|
|
"step": 5082
|
|
},
|
|
{
|
|
"epoch": 2.669642857142857,
|
|
"grad_norm": 19.69197224149106,
|
|
"learning_rate": 3.661186754806051e-07,
|
|
"loss": 0.9408999681472778,
|
|
"step": 5083
|
|
},
|
|
{
|
|
"epoch": 2.6701680672268906,
|
|
"grad_norm": 8.305112385944424,
|
|
"learning_rate": 3.649716561996353e-07,
|
|
"loss": 0.5280729532241821,
|
|
"step": 5084
|
|
},
|
|
{
|
|
"epoch": 2.6706932773109244,
|
|
"grad_norm": 7.363834394608336,
|
|
"learning_rate": 3.6382636843218967e-07,
|
|
"loss": 0.25487959384918213,
|
|
"step": 5085
|
|
},
|
|
{
|
|
"epoch": 2.671218487394958,
|
|
"grad_norm": 15.765167085439563,
|
|
"learning_rate": 3.62682812606116e-07,
|
|
"loss": 0.8331020474433899,
|
|
"step": 5086
|
|
},
|
|
{
|
|
"epoch": 2.6717436974789917,
|
|
"grad_norm": 14.727366211498815,
|
|
"learning_rate": 3.615409891486127e-07,
|
|
"loss": 0.9125362634658813,
|
|
"step": 5087
|
|
},
|
|
{
|
|
"epoch": 2.6722689075630255,
|
|
"grad_norm": 15.26226066798175,
|
|
"learning_rate": 3.604008984862334e-07,
|
|
"loss": 0.7915086150169373,
|
|
"step": 5088
|
|
},
|
|
{
|
|
"epoch": 2.672794117647059,
|
|
"grad_norm": 6.484660751647232,
|
|
"learning_rate": 3.592625410448813e-07,
|
|
"loss": 0.11719128489494324,
|
|
"step": 5089
|
|
},
|
|
{
|
|
"epoch": 2.6733193277310923,
|
|
"grad_norm": 8.572139401460749,
|
|
"learning_rate": 3.581259172498169e-07,
|
|
"loss": 0.5589736104011536,
|
|
"step": 5090
|
|
},
|
|
{
|
|
"epoch": 2.673844537815126,
|
|
"grad_norm": 12.452725733062692,
|
|
"learning_rate": 3.569910275256483e-07,
|
|
"loss": 0.29968681931495667,
|
|
"step": 5091
|
|
},
|
|
{
|
|
"epoch": 2.67436974789916,
|
|
"grad_norm": 12.172960299583249,
|
|
"learning_rate": 3.5585787229633794e-07,
|
|
"loss": 0.48796796798706055,
|
|
"step": 5092
|
|
},
|
|
{
|
|
"epoch": 2.6748949579831933,
|
|
"grad_norm": 14.3603765992435,
|
|
"learning_rate": 3.5472645198520064e-07,
|
|
"loss": 0.9962418675422668,
|
|
"step": 5093
|
|
},
|
|
{
|
|
"epoch": 2.6754201680672267,
|
|
"grad_norm": 13.686422078868164,
|
|
"learning_rate": 3.535967670149021e-07,
|
|
"loss": 0.49266868829727173,
|
|
"step": 5094
|
|
},
|
|
{
|
|
"epoch": 2.6759453781512605,
|
|
"grad_norm": 9.403051743595773,
|
|
"learning_rate": 3.5246881780746034e-07,
|
|
"loss": 0.3322441577911377,
|
|
"step": 5095
|
|
},
|
|
{
|
|
"epoch": 2.6764705882352944,
|
|
"grad_norm": 9.016010865890543,
|
|
"learning_rate": 3.51342604784245e-07,
|
|
"loss": 1.0054486989974976,
|
|
"step": 5096
|
|
},
|
|
{
|
|
"epoch": 2.6769957983193278,
|
|
"grad_norm": 7.950977095117265,
|
|
"learning_rate": 3.502181283659756e-07,
|
|
"loss": 0.5073027610778809,
|
|
"step": 5097
|
|
},
|
|
{
|
|
"epoch": 2.677521008403361,
|
|
"grad_norm": 12.763806412288341,
|
|
"learning_rate": 3.490953889727261e-07,
|
|
"loss": 0.40890246629714966,
|
|
"step": 5098
|
|
},
|
|
{
|
|
"epoch": 2.678046218487395,
|
|
"grad_norm": 9.699736906986432,
|
|
"learning_rate": 3.479743870239188e-07,
|
|
"loss": 0.490925133228302,
|
|
"step": 5099
|
|
},
|
|
{
|
|
"epoch": 2.678571428571429,
|
|
"grad_norm": 12.970574000287224,
|
|
"learning_rate": 3.4685512293832823e-07,
|
|
"loss": 0.38826829195022583,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 2.679096638655462,
|
|
"grad_norm": 11.456556383710724,
|
|
"learning_rate": 3.4573759713407927e-07,
|
|
"loss": 0.34248778223991394,
|
|
"step": 5101
|
|
},
|
|
{
|
|
"epoch": 2.6796218487394956,
|
|
"grad_norm": 12.123584688114516,
|
|
"learning_rate": 3.4462181002864694e-07,
|
|
"loss": 0.751177966594696,
|
|
"step": 5102
|
|
},
|
|
{
|
|
"epoch": 2.6801470588235294,
|
|
"grad_norm": 7.9298450949287265,
|
|
"learning_rate": 3.435077620388577e-07,
|
|
"loss": 0.2769845724105835,
|
|
"step": 5103
|
|
},
|
|
{
|
|
"epoch": 2.6806722689075633,
|
|
"grad_norm": 12.652891528358486,
|
|
"learning_rate": 3.423954535808871e-07,
|
|
"loss": 0.32310184836387634,
|
|
"step": 5104
|
|
},
|
|
{
|
|
"epoch": 2.6811974789915967,
|
|
"grad_norm": 7.673829371355647,
|
|
"learning_rate": 3.4128488507026327e-07,
|
|
"loss": 0.36670172214508057,
|
|
"step": 5105
|
|
},
|
|
{
|
|
"epoch": 2.68172268907563,
|
|
"grad_norm": 9.227254053382925,
|
|
"learning_rate": 3.4017605692186207e-07,
|
|
"loss": 0.5683304667472839,
|
|
"step": 5106
|
|
},
|
|
{
|
|
"epoch": 2.682247899159664,
|
|
"grad_norm": 10.441834545797024,
|
|
"learning_rate": 3.390689695499089e-07,
|
|
"loss": 0.46113595366477966,
|
|
"step": 5107
|
|
},
|
|
{
|
|
"epoch": 2.6827731092436977,
|
|
"grad_norm": 6.066434769965064,
|
|
"learning_rate": 3.379636233679812e-07,
|
|
"loss": 0.10238748788833618,
|
|
"step": 5108
|
|
},
|
|
{
|
|
"epoch": 2.683298319327731,
|
|
"grad_norm": 11.27713465418464,
|
|
"learning_rate": 3.3686001878900365e-07,
|
|
"loss": 0.33836841583251953,
|
|
"step": 5109
|
|
},
|
|
{
|
|
"epoch": 2.6838235294117645,
|
|
"grad_norm": 11.489752386889222,
|
|
"learning_rate": 3.3575815622525096e-07,
|
|
"loss": 0.7936794757843018,
|
|
"step": 5110
|
|
},
|
|
{
|
|
"epoch": 2.6843487394957983,
|
|
"grad_norm": 7.366511831349445,
|
|
"learning_rate": 3.3465803608834837e-07,
|
|
"loss": 0.120862677693367,
|
|
"step": 5111
|
|
},
|
|
{
|
|
"epoch": 2.684873949579832,
|
|
"grad_norm": 9.496802476563838,
|
|
"learning_rate": 3.335596587892681e-07,
|
|
"loss": 0.5731973648071289,
|
|
"step": 5112
|
|
},
|
|
{
|
|
"epoch": 2.6853991596638656,
|
|
"grad_norm": 12.501951837295532,
|
|
"learning_rate": 3.324630247383337e-07,
|
|
"loss": 0.7040807008743286,
|
|
"step": 5113
|
|
},
|
|
{
|
|
"epoch": 2.685924369747899,
|
|
"grad_norm": 11.644252178637693,
|
|
"learning_rate": 3.313681343452152e-07,
|
|
"loss": 1.183510661125183,
|
|
"step": 5114
|
|
},
|
|
{
|
|
"epoch": 2.6864495798319328,
|
|
"grad_norm": 13.946909485798127,
|
|
"learning_rate": 3.3027498801893266e-07,
|
|
"loss": 0.7500033974647522,
|
|
"step": 5115
|
|
},
|
|
{
|
|
"epoch": 2.6869747899159666,
|
|
"grad_norm": 9.296163618879163,
|
|
"learning_rate": 3.2918358616785384e-07,
|
|
"loss": 0.3383568525314331,
|
|
"step": 5116
|
|
},
|
|
{
|
|
"epoch": 2.6875,
|
|
"grad_norm": 10.415550548151717,
|
|
"learning_rate": 3.2809392919969483e-07,
|
|
"loss": 0.7802726030349731,
|
|
"step": 5117
|
|
},
|
|
{
|
|
"epoch": 2.6880252100840334,
|
|
"grad_norm": 14.71984912515302,
|
|
"learning_rate": 3.2700601752152117e-07,
|
|
"loss": 0.5044372081756592,
|
|
"step": 5118
|
|
},
|
|
{
|
|
"epoch": 2.6885504201680672,
|
|
"grad_norm": 9.397877479800641,
|
|
"learning_rate": 3.2591985153974383e-07,
|
|
"loss": 0.789323091506958,
|
|
"step": 5119
|
|
},
|
|
{
|
|
"epoch": 2.689075630252101,
|
|
"grad_norm": 12.374559013291245,
|
|
"learning_rate": 3.248354316601254e-07,
|
|
"loss": 0.3527710735797882,
|
|
"step": 5120
|
|
},
|
|
{
|
|
"epoch": 2.6896008403361344,
|
|
"grad_norm": 11.935226966945294,
|
|
"learning_rate": 3.2375275828777253e-07,
|
|
"loss": 0.5447399020195007,
|
|
"step": 5121
|
|
},
|
|
{
|
|
"epoch": 2.690126050420168,
|
|
"grad_norm": 15.418321811650998,
|
|
"learning_rate": 3.226718318271416e-07,
|
|
"loss": 0.5229968428611755,
|
|
"step": 5122
|
|
},
|
|
{
|
|
"epoch": 2.6906512605042017,
|
|
"grad_norm": 11.611984987052244,
|
|
"learning_rate": 3.215926526820351e-07,
|
|
"loss": 0.27938351035118103,
|
|
"step": 5123
|
|
},
|
|
{
|
|
"epoch": 2.6911764705882355,
|
|
"grad_norm": 10.234290346976508,
|
|
"learning_rate": 3.20515221255604e-07,
|
|
"loss": 0.45139533281326294,
|
|
"step": 5124
|
|
},
|
|
{
|
|
"epoch": 2.691701680672269,
|
|
"grad_norm": 8.946855070567134,
|
|
"learning_rate": 3.194395379503451e-07,
|
|
"loss": 0.18062157928943634,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 2.6922268907563023,
|
|
"grad_norm": 8.171326689808147,
|
|
"learning_rate": 3.1836560316810263e-07,
|
|
"loss": 0.4740408658981323,
|
|
"step": 5126
|
|
},
|
|
{
|
|
"epoch": 2.692752100840336,
|
|
"grad_norm": 11.085258568472408,
|
|
"learning_rate": 3.172934173100689e-07,
|
|
"loss": 0.4420720934867859,
|
|
"step": 5127
|
|
},
|
|
{
|
|
"epoch": 2.69327731092437,
|
|
"grad_norm": 11.835224430789292,
|
|
"learning_rate": 3.162229807767808e-07,
|
|
"loss": 0.45101088285446167,
|
|
"step": 5128
|
|
},
|
|
{
|
|
"epoch": 2.6938025210084033,
|
|
"grad_norm": 11.639413644216262,
|
|
"learning_rate": 3.151542939681235e-07,
|
|
"loss": 0.17761468887329102,
|
|
"step": 5129
|
|
},
|
|
{
|
|
"epoch": 2.6943277310924367,
|
|
"grad_norm": 17.31427173957996,
|
|
"learning_rate": 3.140873572833275e-07,
|
|
"loss": 0.713176965713501,
|
|
"step": 5130
|
|
},
|
|
{
|
|
"epoch": 2.6948529411764706,
|
|
"grad_norm": 11.838748608092535,
|
|
"learning_rate": 3.1302217112096855e-07,
|
|
"loss": 0.6659767031669617,
|
|
"step": 5131
|
|
},
|
|
{
|
|
"epoch": 2.6953781512605044,
|
|
"grad_norm": 14.292981371162789,
|
|
"learning_rate": 3.119587358789711e-07,
|
|
"loss": 0.6102906465530396,
|
|
"step": 5132
|
|
},
|
|
{
|
|
"epoch": 2.695903361344538,
|
|
"grad_norm": 7.170690939536522,
|
|
"learning_rate": 3.108970519546034e-07,
|
|
"loss": 0.4029874801635742,
|
|
"step": 5133
|
|
},
|
|
{
|
|
"epoch": 2.696428571428571,
|
|
"grad_norm": 11.454931920897556,
|
|
"learning_rate": 3.098371197444794e-07,
|
|
"loss": 0.3053671717643738,
|
|
"step": 5134
|
|
},
|
|
{
|
|
"epoch": 2.696953781512605,
|
|
"grad_norm": 11.947430182963497,
|
|
"learning_rate": 3.0877893964456116e-07,
|
|
"loss": 0.3585653603076935,
|
|
"step": 5135
|
|
},
|
|
{
|
|
"epoch": 2.697478991596639,
|
|
"grad_norm": 7.936443136705915,
|
|
"learning_rate": 3.0772251205015315e-07,
|
|
"loss": 0.4208843410015106,
|
|
"step": 5136
|
|
},
|
|
{
|
|
"epoch": 2.6980042016806722,
|
|
"grad_norm": 7.205033921601751,
|
|
"learning_rate": 3.066678373559062e-07,
|
|
"loss": 0.32238075137138367,
|
|
"step": 5137
|
|
},
|
|
{
|
|
"epoch": 2.6985294117647056,
|
|
"grad_norm": 13.779233308588191,
|
|
"learning_rate": 3.0561491595581695e-07,
|
|
"loss": 0.7137563228607178,
|
|
"step": 5138
|
|
},
|
|
{
|
|
"epoch": 2.6990546218487395,
|
|
"grad_norm": 12.03154113537296,
|
|
"learning_rate": 3.0456374824322674e-07,
|
|
"loss": 0.41850051283836365,
|
|
"step": 5139
|
|
},
|
|
{
|
|
"epoch": 2.6995798319327733,
|
|
"grad_norm": 16.939026850849142,
|
|
"learning_rate": 3.03514334610821e-07,
|
|
"loss": 0.6328059434890747,
|
|
"step": 5140
|
|
},
|
|
{
|
|
"epoch": 2.7001050420168067,
|
|
"grad_norm": 14.51801961228705,
|
|
"learning_rate": 3.0246667545063057e-07,
|
|
"loss": 0.2627220153808594,
|
|
"step": 5141
|
|
},
|
|
{
|
|
"epoch": 2.70063025210084,
|
|
"grad_norm": 11.531479972898897,
|
|
"learning_rate": 3.014207711540318e-07,
|
|
"loss": 0.8954662680625916,
|
|
"step": 5142
|
|
},
|
|
{
|
|
"epoch": 2.701155462184874,
|
|
"grad_norm": 20.985896647812933,
|
|
"learning_rate": 3.0037662211174437e-07,
|
|
"loss": 0.5788215398788452,
|
|
"step": 5143
|
|
},
|
|
{
|
|
"epoch": 2.7016806722689077,
|
|
"grad_norm": 7.5724057917125585,
|
|
"learning_rate": 2.993342287138312e-07,
|
|
"loss": 0.6538835167884827,
|
|
"step": 5144
|
|
},
|
|
{
|
|
"epoch": 2.702205882352941,
|
|
"grad_norm": 8.327473652430706,
|
|
"learning_rate": 2.9829359134970206e-07,
|
|
"loss": 0.49965575337409973,
|
|
"step": 5145
|
|
},
|
|
{
|
|
"epoch": 2.7027310924369745,
|
|
"grad_norm": 7.523605616397045,
|
|
"learning_rate": 2.972547104081081e-07,
|
|
"loss": 0.33146965503692627,
|
|
"step": 5146
|
|
},
|
|
{
|
|
"epoch": 2.7032563025210083,
|
|
"grad_norm": 9.267993831346974,
|
|
"learning_rate": 2.962175862771455e-07,
|
|
"loss": 0.4421359896659851,
|
|
"step": 5147
|
|
},
|
|
{
|
|
"epoch": 2.703781512605042,
|
|
"grad_norm": 8.956967198864293,
|
|
"learning_rate": 2.951822193442544e-07,
|
|
"loss": 0.29288220405578613,
|
|
"step": 5148
|
|
},
|
|
{
|
|
"epoch": 2.7043067226890756,
|
|
"grad_norm": 14.8026688753217,
|
|
"learning_rate": 2.9414860999621764e-07,
|
|
"loss": 0.9873912930488586,
|
|
"step": 5149
|
|
},
|
|
{
|
|
"epoch": 2.7048319327731094,
|
|
"grad_norm": 11.676850508780802,
|
|
"learning_rate": 2.9311675861916246e-07,
|
|
"loss": 0.43603038787841797,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 2.705357142857143,
|
|
"grad_norm": 6.7432563989320355,
|
|
"learning_rate": 2.920866655985588e-07,
|
|
"loss": 0.12715210020542145,
|
|
"step": 5151
|
|
},
|
|
{
|
|
"epoch": 2.7058823529411766,
|
|
"grad_norm": 9.044957894735301,
|
|
"learning_rate": 2.9105833131922056e-07,
|
|
"loss": 0.2916662096977234,
|
|
"step": 5152
|
|
},
|
|
{
|
|
"epoch": 2.70640756302521,
|
|
"grad_norm": 14.286187786290421,
|
|
"learning_rate": 2.9003175616530264e-07,
|
|
"loss": 0.8403812646865845,
|
|
"step": 5153
|
|
},
|
|
{
|
|
"epoch": 2.706932773109244,
|
|
"grad_norm": 13.314369909470404,
|
|
"learning_rate": 2.8900694052030553e-07,
|
|
"loss": 0.362918883562088,
|
|
"step": 5154
|
|
},
|
|
{
|
|
"epoch": 2.7074579831932772,
|
|
"grad_norm": 10.088509037070159,
|
|
"learning_rate": 2.879838847670696e-07,
|
|
"loss": 0.2886349856853485,
|
|
"step": 5155
|
|
},
|
|
{
|
|
"epoch": 2.707983193277311,
|
|
"grad_norm": 12.567570112336634,
|
|
"learning_rate": 2.8696258928777975e-07,
|
|
"loss": 0.446466863155365,
|
|
"step": 5156
|
|
},
|
|
{
|
|
"epoch": 2.7085084033613445,
|
|
"grad_norm": 6.717264869350844,
|
|
"learning_rate": 2.8594305446396245e-07,
|
|
"loss": 0.21716740727424622,
|
|
"step": 5157
|
|
},
|
|
{
|
|
"epoch": 2.7090336134453783,
|
|
"grad_norm": 9.794897398497712,
|
|
"learning_rate": 2.8492528067648575e-07,
|
|
"loss": 0.8401632308959961,
|
|
"step": 5158
|
|
},
|
|
{
|
|
"epoch": 2.7095588235294117,
|
|
"grad_norm": 9.37508491060464,
|
|
"learning_rate": 2.839092683055622e-07,
|
|
"loss": 0.6506957411766052,
|
|
"step": 5159
|
|
},
|
|
{
|
|
"epoch": 2.7100840336134455,
|
|
"grad_norm": 16.041922721308104,
|
|
"learning_rate": 2.828950177307443e-07,
|
|
"loss": 0.6913888454437256,
|
|
"step": 5160
|
|
},
|
|
{
|
|
"epoch": 2.710609243697479,
|
|
"grad_norm": 8.810022606786823,
|
|
"learning_rate": 2.818825293309274e-07,
|
|
"loss": 0.34516096115112305,
|
|
"step": 5161
|
|
},
|
|
{
|
|
"epoch": 2.7111344537815127,
|
|
"grad_norm": 10.626186409807241,
|
|
"learning_rate": 2.808718034843472e-07,
|
|
"loss": 0.4685274362564087,
|
|
"step": 5162
|
|
},
|
|
{
|
|
"epoch": 2.711659663865546,
|
|
"grad_norm": 17.02547421741878,
|
|
"learning_rate": 2.798628405685827e-07,
|
|
"loss": 0.3441670536994934,
|
|
"step": 5163
|
|
},
|
|
{
|
|
"epoch": 2.71218487394958,
|
|
"grad_norm": 8.73713214975575,
|
|
"learning_rate": 2.7885564096055305e-07,
|
|
"loss": 0.2538045048713684,
|
|
"step": 5164
|
|
},
|
|
{
|
|
"epoch": 2.7127100840336134,
|
|
"grad_norm": 15.018737948467326,
|
|
"learning_rate": 2.7785020503651783e-07,
|
|
"loss": 0.5275735855102539,
|
|
"step": 5165
|
|
},
|
|
{
|
|
"epoch": 2.713235294117647,
|
|
"grad_norm": 7.8716962330650615,
|
|
"learning_rate": 2.7684653317208154e-07,
|
|
"loss": 0.5633281469345093,
|
|
"step": 5166
|
|
},
|
|
{
|
|
"epoch": 2.7137605042016806,
|
|
"grad_norm": 12.214703795854728,
|
|
"learning_rate": 2.7584462574218595e-07,
|
|
"loss": 0.6210681200027466,
|
|
"step": 5167
|
|
},
|
|
{
|
|
"epoch": 2.7142857142857144,
|
|
"grad_norm": 11.077590971720717,
|
|
"learning_rate": 2.74844483121115e-07,
|
|
"loss": 0.45682603120803833,
|
|
"step": 5168
|
|
},
|
|
{
|
|
"epoch": 2.714810924369748,
|
|
"grad_norm": 7.188246699120523,
|
|
"learning_rate": 2.7384610568249313e-07,
|
|
"loss": 0.24761930108070374,
|
|
"step": 5169
|
|
},
|
|
{
|
|
"epoch": 2.7153361344537816,
|
|
"grad_norm": 13.962531163836607,
|
|
"learning_rate": 2.7284949379928535e-07,
|
|
"loss": 0.4918213486671448,
|
|
"step": 5170
|
|
},
|
|
{
|
|
"epoch": 2.715861344537815,
|
|
"grad_norm": 9.93389399552238,
|
|
"learning_rate": 2.7185464784379777e-07,
|
|
"loss": 0.680639922618866,
|
|
"step": 5171
|
|
},
|
|
{
|
|
"epoch": 2.716386554621849,
|
|
"grad_norm": 15.257188440144379,
|
|
"learning_rate": 2.708615681876747e-07,
|
|
"loss": 0.588678777217865,
|
|
"step": 5172
|
|
},
|
|
{
|
|
"epoch": 2.7169117647058822,
|
|
"grad_norm": 9.996652705020658,
|
|
"learning_rate": 2.698702552019045e-07,
|
|
"loss": 0.16154924035072327,
|
|
"step": 5173
|
|
},
|
|
{
|
|
"epoch": 2.717436974789916,
|
|
"grad_norm": 15.809024926042696,
|
|
"learning_rate": 2.6888070925681244e-07,
|
|
"loss": 0.5694648027420044,
|
|
"step": 5174
|
|
},
|
|
{
|
|
"epoch": 2.7179621848739495,
|
|
"grad_norm": 12.544449454952185,
|
|
"learning_rate": 2.67892930722064e-07,
|
|
"loss": 0.6989417672157288,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 2.7184873949579833,
|
|
"grad_norm": 9.30951304822431,
|
|
"learning_rate": 2.669069199666652e-07,
|
|
"loss": 0.5404279828071594,
|
|
"step": 5176
|
|
},
|
|
{
|
|
"epoch": 2.7190126050420167,
|
|
"grad_norm": 11.562908665845434,
|
|
"learning_rate": 2.659226773589607e-07,
|
|
"loss": 0.9708887338638306,
|
|
"step": 5177
|
|
},
|
|
{
|
|
"epoch": 2.7195378151260505,
|
|
"grad_norm": 11.028694871376349,
|
|
"learning_rate": 2.649402032666365e-07,
|
|
"loss": 0.5706590414047241,
|
|
"step": 5178
|
|
},
|
|
{
|
|
"epoch": 2.720063025210084,
|
|
"grad_norm": 16.18872235961291,
|
|
"learning_rate": 2.639594980567162e-07,
|
|
"loss": 0.9086883664131165,
|
|
"step": 5179
|
|
},
|
|
{
|
|
"epoch": 2.7205882352941178,
|
|
"grad_norm": 10.588572533047328,
|
|
"learning_rate": 2.6298056209556164e-07,
|
|
"loss": 1.261301040649414,
|
|
"step": 5180
|
|
},
|
|
{
|
|
"epoch": 2.721113445378151,
|
|
"grad_norm": 8.784072350037729,
|
|
"learning_rate": 2.620033957488777e-07,
|
|
"loss": 0.3578791618347168,
|
|
"step": 5181
|
|
},
|
|
{
|
|
"epoch": 2.721638655462185,
|
|
"grad_norm": 9.747894425934968,
|
|
"learning_rate": 2.6102799938170444e-07,
|
|
"loss": 0.265135258436203,
|
|
"step": 5182
|
|
},
|
|
{
|
|
"epoch": 2.7221638655462184,
|
|
"grad_norm": 8.892857907929,
|
|
"learning_rate": 2.6005437335842155e-07,
|
|
"loss": 0.40978115797042847,
|
|
"step": 5183
|
|
},
|
|
{
|
|
"epoch": 2.722689075630252,
|
|
"grad_norm": 12.999338000428267,
|
|
"learning_rate": 2.5908251804274863e-07,
|
|
"loss": 0.6683812737464905,
|
|
"step": 5184
|
|
},
|
|
{
|
|
"epoch": 2.7232142857142856,
|
|
"grad_norm": 11.186695108480528,
|
|
"learning_rate": 2.581124337977425e-07,
|
|
"loss": 0.31849461793899536,
|
|
"step": 5185
|
|
},
|
|
{
|
|
"epoch": 2.7237394957983194,
|
|
"grad_norm": 12.15900271550104,
|
|
"learning_rate": 2.5714412098579886e-07,
|
|
"loss": 0.43230393528938293,
|
|
"step": 5186
|
|
},
|
|
{
|
|
"epoch": 2.724264705882353,
|
|
"grad_norm": 11.42415083456005,
|
|
"learning_rate": 2.5617757996865053e-07,
|
|
"loss": 0.7816082239151001,
|
|
"step": 5187
|
|
},
|
|
{
|
|
"epoch": 2.7247899159663866,
|
|
"grad_norm": 11.839165575978413,
|
|
"learning_rate": 2.552128111073715e-07,
|
|
"loss": 0.458351731300354,
|
|
"step": 5188
|
|
},
|
|
{
|
|
"epoch": 2.72531512605042,
|
|
"grad_norm": 17.6454826719477,
|
|
"learning_rate": 2.542498147623701e-07,
|
|
"loss": 0.9202781319618225,
|
|
"step": 5189
|
|
},
|
|
{
|
|
"epoch": 2.725840336134454,
|
|
"grad_norm": 14.666203901231317,
|
|
"learning_rate": 2.532885912933952e-07,
|
|
"loss": 0.5745277404785156,
|
|
"step": 5190
|
|
},
|
|
{
|
|
"epoch": 2.7263655462184873,
|
|
"grad_norm": 8.854490892871947,
|
|
"learning_rate": 2.523291410595319e-07,
|
|
"loss": 0.5427813529968262,
|
|
"step": 5191
|
|
},
|
|
{
|
|
"epoch": 2.726890756302521,
|
|
"grad_norm": 12.861767210044425,
|
|
"learning_rate": 2.5137146441920277e-07,
|
|
"loss": 0.30206120014190674,
|
|
"step": 5192
|
|
},
|
|
{
|
|
"epoch": 2.7274159663865545,
|
|
"grad_norm": 9.46209596488583,
|
|
"learning_rate": 2.50415561730169e-07,
|
|
"loss": 0.5628501176834106,
|
|
"step": 5193
|
|
},
|
|
{
|
|
"epoch": 2.7279411764705883,
|
|
"grad_norm": 7.937418909307046,
|
|
"learning_rate": 2.4946143334952764e-07,
|
|
"loss": 0.3499003052711487,
|
|
"step": 5194
|
|
},
|
|
{
|
|
"epoch": 2.7284663865546217,
|
|
"grad_norm": 11.452914711023155,
|
|
"learning_rate": 2.485090796337142e-07,
|
|
"loss": 0.5341336727142334,
|
|
"step": 5195
|
|
},
|
|
{
|
|
"epoch": 2.7289915966386555,
|
|
"grad_norm": 11.04327253903026,
|
|
"learning_rate": 2.4755850093850063e-07,
|
|
"loss": 0.5487924218177795,
|
|
"step": 5196
|
|
},
|
|
{
|
|
"epoch": 2.729516806722689,
|
|
"grad_norm": 8.410998913839864,
|
|
"learning_rate": 2.4660969761899576e-07,
|
|
"loss": 0.46378302574157715,
|
|
"step": 5197
|
|
},
|
|
{
|
|
"epoch": 2.7300420168067228,
|
|
"grad_norm": 13.564689895319297,
|
|
"learning_rate": 2.456626700296455e-07,
|
|
"loss": 0.39550554752349854,
|
|
"step": 5198
|
|
},
|
|
{
|
|
"epoch": 2.730567226890756,
|
|
"grad_norm": 11.341228251483367,
|
|
"learning_rate": 2.447174185242324e-07,
|
|
"loss": 0.2185949683189392,
|
|
"step": 5199
|
|
},
|
|
{
|
|
"epoch": 2.73109243697479,
|
|
"grad_norm": 13.165934898963018,
|
|
"learning_rate": 2.437739434558745e-07,
|
|
"loss": 0.5988172292709351,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 2.7316176470588234,
|
|
"grad_norm": 11.223174592460525,
|
|
"learning_rate": 2.428322451770276e-07,
|
|
"loss": 0.35855352878570557,
|
|
"step": 5201
|
|
},
|
|
{
|
|
"epoch": 2.732142857142857,
|
|
"grad_norm": 14.238648883274028,
|
|
"learning_rate": 2.418923240394816e-07,
|
|
"loss": 0.3877594470977783,
|
|
"step": 5202
|
|
},
|
|
{
|
|
"epoch": 2.7326680672268906,
|
|
"grad_norm": 9.531355162077892,
|
|
"learning_rate": 2.409541803943666e-07,
|
|
"loss": 0.29127657413482666,
|
|
"step": 5203
|
|
},
|
|
{
|
|
"epoch": 2.7331932773109244,
|
|
"grad_norm": 8.905301611343187,
|
|
"learning_rate": 2.40017814592145e-07,
|
|
"loss": 0.22414088249206543,
|
|
"step": 5204
|
|
},
|
|
{
|
|
"epoch": 2.733718487394958,
|
|
"grad_norm": 9.08993426406396,
|
|
"learning_rate": 2.3908322698261597e-07,
|
|
"loss": 0.34578025341033936,
|
|
"step": 5205
|
|
},
|
|
{
|
|
"epoch": 2.7342436974789917,
|
|
"grad_norm": 9.60290329750277,
|
|
"learning_rate": 2.3815041791491467e-07,
|
|
"loss": 0.9344683885574341,
|
|
"step": 5206
|
|
},
|
|
{
|
|
"epoch": 2.7347689075630255,
|
|
"grad_norm": 8.536931437238191,
|
|
"learning_rate": 2.3721938773751175e-07,
|
|
"loss": 0.7497357130050659,
|
|
"step": 5207
|
|
},
|
|
{
|
|
"epoch": 2.735294117647059,
|
|
"grad_norm": 12.09791752641407,
|
|
"learning_rate": 2.3629013679821343e-07,
|
|
"loss": 0.244570791721344,
|
|
"step": 5208
|
|
},
|
|
{
|
|
"epoch": 2.7358193277310923,
|
|
"grad_norm": 11.471601434314076,
|
|
"learning_rate": 2.3536266544416043e-07,
|
|
"loss": 0.31481319665908813,
|
|
"step": 5209
|
|
},
|
|
{
|
|
"epoch": 2.736344537815126,
|
|
"grad_norm": 14.472248151194497,
|
|
"learning_rate": 2.3443697402183107e-07,
|
|
"loss": 0.3894531726837158,
|
|
"step": 5210
|
|
},
|
|
{
|
|
"epoch": 2.73686974789916,
|
|
"grad_norm": 7.711367696201472,
|
|
"learning_rate": 2.3351306287703657e-07,
|
|
"loss": 0.3622080981731415,
|
|
"step": 5211
|
|
},
|
|
{
|
|
"epoch": 2.7373949579831933,
|
|
"grad_norm": 13.570556777124034,
|
|
"learning_rate": 2.3259093235492246e-07,
|
|
"loss": 0.6158708333969116,
|
|
"step": 5212
|
|
},
|
|
{
|
|
"epoch": 2.7379201680672267,
|
|
"grad_norm": 11.418961913377924,
|
|
"learning_rate": 2.3167058279997156e-07,
|
|
"loss": 0.2521127462387085,
|
|
"step": 5213
|
|
},
|
|
{
|
|
"epoch": 2.7384453781512605,
|
|
"grad_norm": 9.380688231523322,
|
|
"learning_rate": 2.3075201455599939e-07,
|
|
"loss": 0.1848069131374359,
|
|
"step": 5214
|
|
},
|
|
{
|
|
"epoch": 2.7389705882352944,
|
|
"grad_norm": 15.619700240407587,
|
|
"learning_rate": 2.2983522796615698e-07,
|
|
"loss": 1.0577616691589355,
|
|
"step": 5215
|
|
},
|
|
{
|
|
"epoch": 2.7394957983193278,
|
|
"grad_norm": 15.023539675588399,
|
|
"learning_rate": 2.2892022337292929e-07,
|
|
"loss": 0.4063257575035095,
|
|
"step": 5216
|
|
},
|
|
{
|
|
"epoch": 2.740021008403361,
|
|
"grad_norm": 9.742470715527096,
|
|
"learning_rate": 2.2800700111813456e-07,
|
|
"loss": 0.38715842366218567,
|
|
"step": 5217
|
|
},
|
|
{
|
|
"epoch": 2.740546218487395,
|
|
"grad_norm": 8.154275100350905,
|
|
"learning_rate": 2.2709556154292878e-07,
|
|
"loss": 0.29554781317710876,
|
|
"step": 5218
|
|
},
|
|
{
|
|
"epoch": 2.741071428571429,
|
|
"grad_norm": 15.388981485681432,
|
|
"learning_rate": 2.2618590498779847e-07,
|
|
"loss": 0.3714393973350525,
|
|
"step": 5219
|
|
},
|
|
{
|
|
"epoch": 2.741596638655462,
|
|
"grad_norm": 16.877988790395055,
|
|
"learning_rate": 2.2527803179256512e-07,
|
|
"loss": 0.4998897314071655,
|
|
"step": 5220
|
|
},
|
|
{
|
|
"epoch": 2.7421218487394956,
|
|
"grad_norm": 7.74639740984657,
|
|
"learning_rate": 2.2437194229638415e-07,
|
|
"loss": 0.40450119972229004,
|
|
"step": 5221
|
|
},
|
|
{
|
|
"epoch": 2.7426470588235294,
|
|
"grad_norm": 10.529193464334151,
|
|
"learning_rate": 2.234676368377442e-07,
|
|
"loss": 0.9691871404647827,
|
|
"step": 5222
|
|
},
|
|
{
|
|
"epoch": 2.7431722689075633,
|
|
"grad_norm": 7.858874563657268,
|
|
"learning_rate": 2.2256511575446837e-07,
|
|
"loss": 0.456901490688324,
|
|
"step": 5223
|
|
},
|
|
{
|
|
"epoch": 2.7436974789915967,
|
|
"grad_norm": 12.315463276930483,
|
|
"learning_rate": 2.21664379383712e-07,
|
|
"loss": 1.2641358375549316,
|
|
"step": 5224
|
|
},
|
|
{
|
|
"epoch": 2.74422268907563,
|
|
"grad_norm": 14.952039409073935,
|
|
"learning_rate": 2.2076542806196588e-07,
|
|
"loss": 0.8916432857513428,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 2.744747899159664,
|
|
"grad_norm": 6.647874374561462,
|
|
"learning_rate": 2.1986826212505085e-07,
|
|
"loss": 0.29105767607688904,
|
|
"step": 5226
|
|
},
|
|
{
|
|
"epoch": 2.7452731092436977,
|
|
"grad_norm": 9.494486770843666,
|
|
"learning_rate": 2.1897288190812271e-07,
|
|
"loss": 0.5085248947143555,
|
|
"step": 5227
|
|
},
|
|
{
|
|
"epoch": 2.745798319327731,
|
|
"grad_norm": 12.758002239363936,
|
|
"learning_rate": 2.1807928774567e-07,
|
|
"loss": 0.5324745178222656,
|
|
"step": 5228
|
|
},
|
|
{
|
|
"epoch": 2.7463235294117645,
|
|
"grad_norm": 12.831924534069625,
|
|
"learning_rate": 2.17187479971514e-07,
|
|
"loss": 0.411658376455307,
|
|
"step": 5229
|
|
},
|
|
{
|
|
"epoch": 2.7468487394957983,
|
|
"grad_norm": 10.316201456070786,
|
|
"learning_rate": 2.1629745891880826e-07,
|
|
"loss": 0.4209370017051697,
|
|
"step": 5230
|
|
},
|
|
{
|
|
"epoch": 2.747373949579832,
|
|
"grad_norm": 8.153955420619475,
|
|
"learning_rate": 2.1540922492003902e-07,
|
|
"loss": 0.5144951939582825,
|
|
"step": 5231
|
|
},
|
|
{
|
|
"epoch": 2.7478991596638656,
|
|
"grad_norm": 9.69868090658562,
|
|
"learning_rate": 2.1452277830702473e-07,
|
|
"loss": 0.3466729521751404,
|
|
"step": 5232
|
|
},
|
|
{
|
|
"epoch": 2.748424369747899,
|
|
"grad_norm": 13.490544809205158,
|
|
"learning_rate": 2.136381194109166e-07,
|
|
"loss": 0.26459285616874695,
|
|
"step": 5233
|
|
},
|
|
{
|
|
"epoch": 2.7489495798319328,
|
|
"grad_norm": 12.72382832217057,
|
|
"learning_rate": 2.1275524856219864e-07,
|
|
"loss": 1.7140297889709473,
|
|
"step": 5234
|
|
},
|
|
{
|
|
"epoch": 2.7494747899159666,
|
|
"grad_norm": 20.1977683566574,
|
|
"learning_rate": 2.1187416609068533e-07,
|
|
"loss": 0.7161862254142761,
|
|
"step": 5235
|
|
},
|
|
{
|
|
"epoch": 2.75,
|
|
"grad_norm": 14.163750003239532,
|
|
"learning_rate": 2.1099487232552395e-07,
|
|
"loss": 0.5943667888641357,
|
|
"step": 5236
|
|
},
|
|
{
|
|
"epoch": 2.7505252100840334,
|
|
"grad_norm": 10.008631260743623,
|
|
"learning_rate": 2.1011736759519286e-07,
|
|
"loss": 1.0886059999465942,
|
|
"step": 5237
|
|
},
|
|
{
|
|
"epoch": 2.7510504201680672,
|
|
"grad_norm": 13.046513895398908,
|
|
"learning_rate": 2.0924165222750315e-07,
|
|
"loss": 0.8634684681892395,
|
|
"step": 5238
|
|
},
|
|
{
|
|
"epoch": 2.751575630252101,
|
|
"grad_norm": 12.602009747509273,
|
|
"learning_rate": 2.0836772654959647e-07,
|
|
"loss": 0.5296191573143005,
|
|
"step": 5239
|
|
},
|
|
{
|
|
"epoch": 2.7521008403361344,
|
|
"grad_norm": 11.488068286472648,
|
|
"learning_rate": 2.0749559088794725e-07,
|
|
"loss": 2.070067882537842,
|
|
"step": 5240
|
|
},
|
|
{
|
|
"epoch": 2.752626050420168,
|
|
"grad_norm": 20.517043931977664,
|
|
"learning_rate": 2.0662524556835982e-07,
|
|
"loss": 1.506117820739746,
|
|
"step": 5241
|
|
},
|
|
{
|
|
"epoch": 2.7531512605042017,
|
|
"grad_norm": 14.568706344718933,
|
|
"learning_rate": 2.0575669091597028e-07,
|
|
"loss": 0.39455515146255493,
|
|
"step": 5242
|
|
},
|
|
{
|
|
"epoch": 2.7536764705882355,
|
|
"grad_norm": 10.191662703901843,
|
|
"learning_rate": 2.048899272552457e-07,
|
|
"loss": 0.36395686864852905,
|
|
"step": 5243
|
|
},
|
|
{
|
|
"epoch": 2.754201680672269,
|
|
"grad_norm": 12.527550419845172,
|
|
"learning_rate": 2.0402495490998374e-07,
|
|
"loss": 0.5779542922973633,
|
|
"step": 5244
|
|
},
|
|
{
|
|
"epoch": 2.7547268907563023,
|
|
"grad_norm": 9.700534359568579,
|
|
"learning_rate": 2.0316177420331375e-07,
|
|
"loss": 0.7974497675895691,
|
|
"step": 5245
|
|
},
|
|
{
|
|
"epoch": 2.755252100840336,
|
|
"grad_norm": 13.081380083043447,
|
|
"learning_rate": 2.0230038545769436e-07,
|
|
"loss": 0.44117051362991333,
|
|
"step": 5246
|
|
},
|
|
{
|
|
"epoch": 2.75577731092437,
|
|
"grad_norm": 12.716563259448858,
|
|
"learning_rate": 2.0144078899491715e-07,
|
|
"loss": 0.33425411581993103,
|
|
"step": 5247
|
|
},
|
|
{
|
|
"epoch": 2.7563025210084033,
|
|
"grad_norm": 12.11437755750207,
|
|
"learning_rate": 2.0058298513610185e-07,
|
|
"loss": 0.271504670381546,
|
|
"step": 5248
|
|
},
|
|
{
|
|
"epoch": 2.7568277310924367,
|
|
"grad_norm": 9.949676273396376,
|
|
"learning_rate": 1.997269742016994e-07,
|
|
"loss": 0.4813274145126343,
|
|
"step": 5249
|
|
},
|
|
{
|
|
"epoch": 2.7573529411764706,
|
|
"grad_norm": 7.966684951065601,
|
|
"learning_rate": 1.9887275651149064e-07,
|
|
"loss": 0.5327956676483154,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 2.7578781512605044,
|
|
"grad_norm": 7.366833698245474,
|
|
"learning_rate": 1.980203323845875e-07,
|
|
"loss": 0.28279638290405273,
|
|
"step": 5251
|
|
},
|
|
{
|
|
"epoch": 2.758403361344538,
|
|
"grad_norm": 10.194026938818547,
|
|
"learning_rate": 1.9716970213943032e-07,
|
|
"loss": 0.3261258006095886,
|
|
"step": 5252
|
|
},
|
|
{
|
|
"epoch": 2.758928571428571,
|
|
"grad_norm": 7.540614511619889,
|
|
"learning_rate": 1.9632086609379041e-07,
|
|
"loss": 0.25825923681259155,
|
|
"step": 5253
|
|
},
|
|
{
|
|
"epoch": 2.759453781512605,
|
|
"grad_norm": 10.079179278488615,
|
|
"learning_rate": 1.954738245647686e-07,
|
|
"loss": 0.3405923843383789,
|
|
"step": 5254
|
|
},
|
|
{
|
|
"epoch": 2.759978991596639,
|
|
"grad_norm": 7.474917382793523,
|
|
"learning_rate": 1.9462857786879562e-07,
|
|
"loss": 0.5673338770866394,
|
|
"step": 5255
|
|
},
|
|
{
|
|
"epoch": 2.7605042016806722,
|
|
"grad_norm": 12.08479845850555,
|
|
"learning_rate": 1.9378512632163116e-07,
|
|
"loss": 0.33351564407348633,
|
|
"step": 5256
|
|
},
|
|
{
|
|
"epoch": 2.7610294117647056,
|
|
"grad_norm": 9.985484581599241,
|
|
"learning_rate": 1.929434702383648e-07,
|
|
"loss": 0.5576674342155457,
|
|
"step": 5257
|
|
},
|
|
{
|
|
"epoch": 2.7615546218487395,
|
|
"grad_norm": 13.262001763752773,
|
|
"learning_rate": 1.9210360993341447e-07,
|
|
"loss": 0.3428288698196411,
|
|
"step": 5258
|
|
},
|
|
{
|
|
"epoch": 2.7620798319327733,
|
|
"grad_norm": 8.538119716704557,
|
|
"learning_rate": 1.9126554572052813e-07,
|
|
"loss": 0.3532198667526245,
|
|
"step": 5259
|
|
},
|
|
{
|
|
"epoch": 2.7626050420168067,
|
|
"grad_norm": 10.353891713475264,
|
|
"learning_rate": 1.9042927791278255e-07,
|
|
"loss": 1.021399736404419,
|
|
"step": 5260
|
|
},
|
|
{
|
|
"epoch": 2.76313025210084,
|
|
"grad_norm": 9.682995832088631,
|
|
"learning_rate": 1.895948068225828e-07,
|
|
"loss": 0.21227969229221344,
|
|
"step": 5261
|
|
},
|
|
{
|
|
"epoch": 2.763655462184874,
|
|
"grad_norm": 14.381793813334657,
|
|
"learning_rate": 1.88762132761664e-07,
|
|
"loss": 0.46639224886894226,
|
|
"step": 5262
|
|
},
|
|
{
|
|
"epoch": 2.7641806722689077,
|
|
"grad_norm": 7.763797401316596,
|
|
"learning_rate": 1.879312560410901e-07,
|
|
"loss": 0.4608529806137085,
|
|
"step": 5263
|
|
},
|
|
{
|
|
"epoch": 2.764705882352941,
|
|
"grad_norm": 14.303230361485726,
|
|
"learning_rate": 1.871021769712511e-07,
|
|
"loss": 0.9698854684829712,
|
|
"step": 5264
|
|
},
|
|
{
|
|
"epoch": 2.7652310924369745,
|
|
"grad_norm": 14.76113395344127,
|
|
"learning_rate": 1.862748958618682e-07,
|
|
"loss": 0.31873244047164917,
|
|
"step": 5265
|
|
},
|
|
{
|
|
"epoch": 2.7657563025210083,
|
|
"grad_norm": 15.832060938925425,
|
|
"learning_rate": 1.8544941302198916e-07,
|
|
"loss": 0.2968553304672241,
|
|
"step": 5266
|
|
},
|
|
{
|
|
"epoch": 2.766281512605042,
|
|
"grad_norm": 11.696503201110188,
|
|
"learning_rate": 1.8462572875999117e-07,
|
|
"loss": 0.4104006886482239,
|
|
"step": 5267
|
|
},
|
|
{
|
|
"epoch": 2.7668067226890756,
|
|
"grad_norm": 14.507134113258228,
|
|
"learning_rate": 1.8380384338357925e-07,
|
|
"loss": 0.39367198944091797,
|
|
"step": 5268
|
|
},
|
|
{
|
|
"epoch": 2.7673319327731094,
|
|
"grad_norm": 6.073440345244819,
|
|
"learning_rate": 1.8298375719978501e-07,
|
|
"loss": 0.15749509632587433,
|
|
"step": 5269
|
|
},
|
|
{
|
|
"epoch": 2.767857142857143,
|
|
"grad_norm": 10.618812305534561,
|
|
"learning_rate": 1.8216547051497057e-07,
|
|
"loss": 0.7008739709854126,
|
|
"step": 5270
|
|
},
|
|
{
|
|
"epoch": 2.7683823529411766,
|
|
"grad_norm": 20.67596785777784,
|
|
"learning_rate": 1.8134898363482367e-07,
|
|
"loss": 0.7393758296966553,
|
|
"step": 5271
|
|
},
|
|
{
|
|
"epoch": 2.76890756302521,
|
|
"grad_norm": 12.111080953413962,
|
|
"learning_rate": 1.805342968643603e-07,
|
|
"loss": 0.5815110802650452,
|
|
"step": 5272
|
|
},
|
|
{
|
|
"epoch": 2.769432773109244,
|
|
"grad_norm": 8.515310981885063,
|
|
"learning_rate": 1.797214105079248e-07,
|
|
"loss": 0.23644420504570007,
|
|
"step": 5273
|
|
},
|
|
{
|
|
"epoch": 2.7699579831932772,
|
|
"grad_norm": 10.33491556272188,
|
|
"learning_rate": 1.7891032486918703e-07,
|
|
"loss": 0.50987708568573,
|
|
"step": 5274
|
|
},
|
|
{
|
|
"epoch": 2.770483193277311,
|
|
"grad_norm": 9.630963979161228,
|
|
"learning_rate": 1.7810104025114572e-07,
|
|
"loss": 0.23692744970321655,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 2.7710084033613445,
|
|
"grad_norm": 8.538582729561833,
|
|
"learning_rate": 1.772935569561257e-07,
|
|
"loss": 0.8385730981826782,
|
|
"step": 5276
|
|
},
|
|
{
|
|
"epoch": 2.7715336134453783,
|
|
"grad_norm": 12.083804730648628,
|
|
"learning_rate": 1.7648787528578127e-07,
|
|
"loss": 0.5103262662887573,
|
|
"step": 5277
|
|
},
|
|
{
|
|
"epoch": 2.7720588235294117,
|
|
"grad_norm": 21.93822699151687,
|
|
"learning_rate": 1.7568399554109106e-07,
|
|
"loss": 0.6717256903648376,
|
|
"step": 5278
|
|
},
|
|
{
|
|
"epoch": 2.7725840336134455,
|
|
"grad_norm": 8.93335327779304,
|
|
"learning_rate": 1.74881918022361e-07,
|
|
"loss": 0.5249470472335815,
|
|
"step": 5279
|
|
},
|
|
{
|
|
"epoch": 2.773109243697479,
|
|
"grad_norm": 7.6321937027675055,
|
|
"learning_rate": 1.7408164302922471e-07,
|
|
"loss": 0.2761799693107605,
|
|
"step": 5280
|
|
},
|
|
{
|
|
"epoch": 2.7736344537815127,
|
|
"grad_norm": 5.1413373017231345,
|
|
"learning_rate": 1.732831708606425e-07,
|
|
"loss": 0.13875234127044678,
|
|
"step": 5281
|
|
},
|
|
{
|
|
"epoch": 2.774159663865546,
|
|
"grad_norm": 8.847203043976213,
|
|
"learning_rate": 1.7248650181489913e-07,
|
|
"loss": 0.3915010392665863,
|
|
"step": 5282
|
|
},
|
|
{
|
|
"epoch": 2.77468487394958,
|
|
"grad_norm": 11.03708182958953,
|
|
"learning_rate": 1.7169163618960817e-07,
|
|
"loss": 0.362044095993042,
|
|
"step": 5283
|
|
},
|
|
{
|
|
"epoch": 2.7752100840336134,
|
|
"grad_norm": 13.893155821128941,
|
|
"learning_rate": 1.708985742817093e-07,
|
|
"loss": 0.7443951368331909,
|
|
"step": 5284
|
|
},
|
|
{
|
|
"epoch": 2.775735294117647,
|
|
"grad_norm": 13.37558564170199,
|
|
"learning_rate": 1.7010731638746668e-07,
|
|
"loss": 0.5511894226074219,
|
|
"step": 5285
|
|
},
|
|
{
|
|
"epoch": 2.7762605042016806,
|
|
"grad_norm": 10.838040601191585,
|
|
"learning_rate": 1.6931786280247164e-07,
|
|
"loss": 0.5490012168884277,
|
|
"step": 5286
|
|
},
|
|
{
|
|
"epoch": 2.7767857142857144,
|
|
"grad_norm": 11.709256979236866,
|
|
"learning_rate": 1.6853021382164215e-07,
|
|
"loss": 0.7820290327072144,
|
|
"step": 5287
|
|
},
|
|
{
|
|
"epoch": 2.777310924369748,
|
|
"grad_norm": 14.16090941397965,
|
|
"learning_rate": 1.677443697392206e-07,
|
|
"loss": 0.37508317828178406,
|
|
"step": 5288
|
|
},
|
|
{
|
|
"epoch": 2.7778361344537816,
|
|
"grad_norm": 13.287352811773326,
|
|
"learning_rate": 1.669603308487755e-07,
|
|
"loss": 0.3444094657897949,
|
|
"step": 5289
|
|
},
|
|
{
|
|
"epoch": 2.778361344537815,
|
|
"grad_norm": 10.214412289717455,
|
|
"learning_rate": 1.6617809744320202e-07,
|
|
"loss": 0.529754638671875,
|
|
"step": 5290
|
|
},
|
|
{
|
|
"epoch": 2.778886554621849,
|
|
"grad_norm": 10.049130527937852,
|
|
"learning_rate": 1.6539766981471861e-07,
|
|
"loss": 0.351434588432312,
|
|
"step": 5291
|
|
},
|
|
{
|
|
"epoch": 2.7794117647058822,
|
|
"grad_norm": 8.273984362625566,
|
|
"learning_rate": 1.6461904825487263e-07,
|
|
"loss": 0.2401900738477707,
|
|
"step": 5292
|
|
},
|
|
{
|
|
"epoch": 2.779936974789916,
|
|
"grad_norm": 13.618541858300375,
|
|
"learning_rate": 1.6384223305453417e-07,
|
|
"loss": 0.3075028657913208,
|
|
"step": 5293
|
|
},
|
|
{
|
|
"epoch": 2.7804621848739495,
|
|
"grad_norm": 13.146301169739644,
|
|
"learning_rate": 1.6306722450389834e-07,
|
|
"loss": 0.38644635677337646,
|
|
"step": 5294
|
|
},
|
|
{
|
|
"epoch": 2.7809873949579833,
|
|
"grad_norm": 54.93450039098761,
|
|
"learning_rate": 1.6229402289248686e-07,
|
|
"loss": 0.41556641459465027,
|
|
"step": 5295
|
|
},
|
|
{
|
|
"epoch": 2.7815126050420167,
|
|
"grad_norm": 11.898619469462943,
|
|
"learning_rate": 1.615226285091448e-07,
|
|
"loss": 0.48220962285995483,
|
|
"step": 5296
|
|
},
|
|
{
|
|
"epoch": 2.7820378151260505,
|
|
"grad_norm": 8.805826705739822,
|
|
"learning_rate": 1.6075304164204385e-07,
|
|
"loss": 0.38020676374435425,
|
|
"step": 5297
|
|
},
|
|
{
|
|
"epoch": 2.782563025210084,
|
|
"grad_norm": 16.819100127416643,
|
|
"learning_rate": 1.5998526257867852e-07,
|
|
"loss": 0.4429679214954376,
|
|
"step": 5298
|
|
},
|
|
{
|
|
"epoch": 2.7830882352941178,
|
|
"grad_norm": 9.930819894253196,
|
|
"learning_rate": 1.5921929160587045e-07,
|
|
"loss": 0.24621260166168213,
|
|
"step": 5299
|
|
},
|
|
{
|
|
"epoch": 2.783613445378151,
|
|
"grad_norm": 8.793978379463555,
|
|
"learning_rate": 1.5845512900976355e-07,
|
|
"loss": 0.21653716266155243,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 2.784138655462185,
|
|
"grad_norm": 9.093765452154368,
|
|
"learning_rate": 1.5769277507582725e-07,
|
|
"loss": 0.2279520481824875,
|
|
"step": 5301
|
|
},
|
|
{
|
|
"epoch": 2.7846638655462184,
|
|
"grad_norm": 8.319356182635838,
|
|
"learning_rate": 1.5693223008885482e-07,
|
|
"loss": 0.3648935854434967,
|
|
"step": 5302
|
|
},
|
|
{
|
|
"epoch": 2.785189075630252,
|
|
"grad_norm": 12.361521188510322,
|
|
"learning_rate": 1.5617349433296402e-07,
|
|
"loss": 0.456037312746048,
|
|
"step": 5303
|
|
},
|
|
{
|
|
"epoch": 2.7857142857142856,
|
|
"grad_norm": 8.854668678436887,
|
|
"learning_rate": 1.55416568091597e-07,
|
|
"loss": 0.6153507232666016,
|
|
"step": 5304
|
|
},
|
|
{
|
|
"epoch": 2.7862394957983194,
|
|
"grad_norm": 13.793754105887354,
|
|
"learning_rate": 1.5466145164751977e-07,
|
|
"loss": 0.6217098832130432,
|
|
"step": 5305
|
|
},
|
|
{
|
|
"epoch": 2.786764705882353,
|
|
"grad_norm": 8.201286776594328,
|
|
"learning_rate": 1.539081452828217e-07,
|
|
"loss": 0.24277333915233612,
|
|
"step": 5306
|
|
},
|
|
{
|
|
"epoch": 2.7872899159663866,
|
|
"grad_norm": 9.010046431737942,
|
|
"learning_rate": 1.531566492789166e-07,
|
|
"loss": 0.28896012902259827,
|
|
"step": 5307
|
|
},
|
|
{
|
|
"epoch": 2.78781512605042,
|
|
"grad_norm": 10.78762507105897,
|
|
"learning_rate": 1.5240696391654208e-07,
|
|
"loss": 0.3023233711719513,
|
|
"step": 5308
|
|
},
|
|
{
|
|
"epoch": 2.788340336134454,
|
|
"grad_norm": 17.127626300524348,
|
|
"learning_rate": 1.5165908947575914e-07,
|
|
"loss": 1.1710079908370972,
|
|
"step": 5309
|
|
},
|
|
{
|
|
"epoch": 2.7888655462184873,
|
|
"grad_norm": 14.491777086749252,
|
|
"learning_rate": 1.5091302623595205e-07,
|
|
"loss": 0.7649059295654297,
|
|
"step": 5310
|
|
},
|
|
{
|
|
"epoch": 2.789390756302521,
|
|
"grad_norm": 11.073341365094276,
|
|
"learning_rate": 1.501687744758279e-07,
|
|
"loss": 0.574504017829895,
|
|
"step": 5311
|
|
},
|
|
{
|
|
"epoch": 2.7899159663865545,
|
|
"grad_norm": 10.805373645764925,
|
|
"learning_rate": 1.4942633447341815e-07,
|
|
"loss": 0.738406777381897,
|
|
"step": 5312
|
|
},
|
|
{
|
|
"epoch": 2.7904411764705883,
|
|
"grad_norm": 10.021427179838017,
|
|
"learning_rate": 1.4868570650607816e-07,
|
|
"loss": 0.396742582321167,
|
|
"step": 5313
|
|
},
|
|
{
|
|
"epoch": 2.7909663865546217,
|
|
"grad_norm": 16.291250676264095,
|
|
"learning_rate": 1.4794689085048386e-07,
|
|
"loss": 0.6696195602416992,
|
|
"step": 5314
|
|
},
|
|
{
|
|
"epoch": 2.7914915966386555,
|
|
"grad_norm": 13.070604790935267,
|
|
"learning_rate": 1.4720988778263612e-07,
|
|
"loss": 0.3883357644081116,
|
|
"step": 5315
|
|
},
|
|
{
|
|
"epoch": 2.792016806722689,
|
|
"grad_norm": 10.139241497110453,
|
|
"learning_rate": 1.464746975778586e-07,
|
|
"loss": 0.4829482436180115,
|
|
"step": 5316
|
|
},
|
|
{
|
|
"epoch": 2.7925420168067228,
|
|
"grad_norm": 14.787881451448238,
|
|
"learning_rate": 1.4574132051079658e-07,
|
|
"loss": 0.3221268653869629,
|
|
"step": 5317
|
|
},
|
|
{
|
|
"epoch": 2.793067226890756,
|
|
"grad_norm": 10.175809100063471,
|
|
"learning_rate": 1.450097568554193e-07,
|
|
"loss": 0.3616814911365509,
|
|
"step": 5318
|
|
},
|
|
{
|
|
"epoch": 2.79359243697479,
|
|
"grad_norm": 8.98723246899509,
|
|
"learning_rate": 1.44280006885017e-07,
|
|
"loss": 0.7913529872894287,
|
|
"step": 5319
|
|
},
|
|
{
|
|
"epoch": 2.7941176470588234,
|
|
"grad_norm": 12.738193711962554,
|
|
"learning_rate": 1.4355207087220436e-07,
|
|
"loss": 0.6317701935768127,
|
|
"step": 5320
|
|
},
|
|
{
|
|
"epoch": 2.794642857142857,
|
|
"grad_norm": 10.677582673573733,
|
|
"learning_rate": 1.4282594908891666e-07,
|
|
"loss": 0.3486226797103882,
|
|
"step": 5321
|
|
},
|
|
{
|
|
"epoch": 2.7951680672268906,
|
|
"grad_norm": 16.37361640788061,
|
|
"learning_rate": 1.4210164180641195e-07,
|
|
"loss": 0.5092406272888184,
|
|
"step": 5322
|
|
},
|
|
{
|
|
"epoch": 2.7956932773109244,
|
|
"grad_norm": 13.25979261324815,
|
|
"learning_rate": 1.4137914929527097e-07,
|
|
"loss": 0.46420764923095703,
|
|
"step": 5323
|
|
},
|
|
{
|
|
"epoch": 2.796218487394958,
|
|
"grad_norm": 9.485223593080686,
|
|
"learning_rate": 1.406584718253967e-07,
|
|
"loss": 1.2684170007705688,
|
|
"step": 5324
|
|
},
|
|
{
|
|
"epoch": 2.7967436974789917,
|
|
"grad_norm": 11.07027522648686,
|
|
"learning_rate": 1.3993960966601328e-07,
|
|
"loss": 0.805744469165802,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 2.7972689075630255,
|
|
"grad_norm": 6.362335976876811,
|
|
"learning_rate": 1.3922256308566696e-07,
|
|
"loss": 0.23201191425323486,
|
|
"step": 5326
|
|
},
|
|
{
|
|
"epoch": 2.797794117647059,
|
|
"grad_norm": 8.94539050724915,
|
|
"learning_rate": 1.3850733235222512e-07,
|
|
"loss": 0.2961287498474121,
|
|
"step": 5327
|
|
},
|
|
{
|
|
"epoch": 2.7983193277310923,
|
|
"grad_norm": 13.688707017855698,
|
|
"learning_rate": 1.3779391773287854e-07,
|
|
"loss": 0.66350257396698,
|
|
"step": 5328
|
|
},
|
|
{
|
|
"epoch": 2.798844537815126,
|
|
"grad_norm": 9.26589871090719,
|
|
"learning_rate": 1.3708231949413676e-07,
|
|
"loss": 0.6241029500961304,
|
|
"step": 5329
|
|
},
|
|
{
|
|
"epoch": 2.79936974789916,
|
|
"grad_norm": 9.52911476160773,
|
|
"learning_rate": 1.3637253790183435e-07,
|
|
"loss": 0.2913224697113037,
|
|
"step": 5330
|
|
},
|
|
{
|
|
"epoch": 2.7998949579831933,
|
|
"grad_norm": 11.36635584842067,
|
|
"learning_rate": 1.3566457322112425e-07,
|
|
"loss": 0.47725537419319153,
|
|
"step": 5331
|
|
},
|
|
{
|
|
"epoch": 2.8004201680672267,
|
|
"grad_norm": 13.66061270502456,
|
|
"learning_rate": 1.349584257164821e-07,
|
|
"loss": 0.4649675190448761,
|
|
"step": 5332
|
|
},
|
|
{
|
|
"epoch": 2.8009453781512605,
|
|
"grad_norm": 8.293346986204147,
|
|
"learning_rate": 1.342540956517041e-07,
|
|
"loss": 0.4227481782436371,
|
|
"step": 5333
|
|
},
|
|
{
|
|
"epoch": 2.8014705882352944,
|
|
"grad_norm": 9.106751554917912,
|
|
"learning_rate": 1.3355158328990814e-07,
|
|
"loss": 0.49842900037765503,
|
|
"step": 5334
|
|
},
|
|
{
|
|
"epoch": 2.8019957983193278,
|
|
"grad_norm": 11.161237893089197,
|
|
"learning_rate": 1.3285088889353203e-07,
|
|
"loss": 1.0039618015289307,
|
|
"step": 5335
|
|
},
|
|
{
|
|
"epoch": 2.802521008403361,
|
|
"grad_norm": 9.60805238161859,
|
|
"learning_rate": 1.3215201272433585e-07,
|
|
"loss": 0.5710328221321106,
|
|
"step": 5336
|
|
},
|
|
{
|
|
"epoch": 2.803046218487395,
|
|
"grad_norm": 11.718173968294836,
|
|
"learning_rate": 1.3145495504339856e-07,
|
|
"loss": 0.7412339448928833,
|
|
"step": 5337
|
|
},
|
|
{
|
|
"epoch": 2.803571428571429,
|
|
"grad_norm": 8.385559110735308,
|
|
"learning_rate": 1.3075971611112237e-07,
|
|
"loss": 0.37511223554611206,
|
|
"step": 5338
|
|
},
|
|
{
|
|
"epoch": 2.804096638655462,
|
|
"grad_norm": 6.657065325141617,
|
|
"learning_rate": 1.3006629618722733e-07,
|
|
"loss": 0.29596245288848877,
|
|
"step": 5339
|
|
},
|
|
{
|
|
"epoch": 2.8046218487394956,
|
|
"grad_norm": 12.859196836036965,
|
|
"learning_rate": 1.293746955307562e-07,
|
|
"loss": 0.44414791464805603,
|
|
"step": 5340
|
|
},
|
|
{
|
|
"epoch": 2.8051470588235294,
|
|
"grad_norm": 7.28302530916716,
|
|
"learning_rate": 1.2868491440007015e-07,
|
|
"loss": 0.791850209236145,
|
|
"step": 5341
|
|
},
|
|
{
|
|
"epoch": 2.8056722689075633,
|
|
"grad_norm": 10.843610602596723,
|
|
"learning_rate": 1.2799695305285241e-07,
|
|
"loss": 1.0128309726715088,
|
|
"step": 5342
|
|
},
|
|
{
|
|
"epoch": 2.8061974789915967,
|
|
"grad_norm": 9.704900045551488,
|
|
"learning_rate": 1.2731081174610526e-07,
|
|
"loss": 0.8688849210739136,
|
|
"step": 5343
|
|
},
|
|
{
|
|
"epoch": 2.80672268907563,
|
|
"grad_norm": 11.951938602000247,
|
|
"learning_rate": 1.2662649073615084e-07,
|
|
"loss": 0.7026639580726624,
|
|
"step": 5344
|
|
},
|
|
{
|
|
"epoch": 2.807247899159664,
|
|
"grad_norm": 7.8211097217957555,
|
|
"learning_rate": 1.2594399027863302e-07,
|
|
"loss": 0.543343186378479,
|
|
"step": 5345
|
|
},
|
|
{
|
|
"epoch": 2.8077731092436977,
|
|
"grad_norm": 11.22422748506458,
|
|
"learning_rate": 1.2526331062851395e-07,
|
|
"loss": 0.5593937039375305,
|
|
"step": 5346
|
|
},
|
|
{
|
|
"epoch": 2.808298319327731,
|
|
"grad_norm": 24.53637925057311,
|
|
"learning_rate": 1.245844520400752e-07,
|
|
"loss": 0.5738602876663208,
|
|
"step": 5347
|
|
},
|
|
{
|
|
"epoch": 2.8088235294117645,
|
|
"grad_norm": 16.051264213764703,
|
|
"learning_rate": 1.2390741476692003e-07,
|
|
"loss": 0.3799140751361847,
|
|
"step": 5348
|
|
},
|
|
{
|
|
"epoch": 2.8093487394957983,
|
|
"grad_norm": 8.783649743514527,
|
|
"learning_rate": 1.232321990619695e-07,
|
|
"loss": 0.4208924174308777,
|
|
"step": 5349
|
|
},
|
|
{
|
|
"epoch": 2.809873949579832,
|
|
"grad_norm": 15.934727226597506,
|
|
"learning_rate": 1.2255880517746453e-07,
|
|
"loss": 0.3477574586868286,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 2.8103991596638656,
|
|
"grad_norm": 12.036751450871467,
|
|
"learning_rate": 1.2188723336496623e-07,
|
|
"loss": 0.49650102853775024,
|
|
"step": 5351
|
|
},
|
|
{
|
|
"epoch": 2.810924369747899,
|
|
"grad_norm": 10.078649824134489,
|
|
"learning_rate": 1.2121748387535437e-07,
|
|
"loss": 0.6503196954727173,
|
|
"step": 5352
|
|
},
|
|
{
|
|
"epoch": 2.8114495798319328,
|
|
"grad_norm": 17.584087336453408,
|
|
"learning_rate": 1.205495569588283e-07,
|
|
"loss": 0.8900395631790161,
|
|
"step": 5353
|
|
},
|
|
{
|
|
"epoch": 2.8119747899159666,
|
|
"grad_norm": 10.319350930512377,
|
|
"learning_rate": 1.198834528649062e-07,
|
|
"loss": 0.46624845266342163,
|
|
"step": 5354
|
|
},
|
|
{
|
|
"epoch": 2.8125,
|
|
"grad_norm": 11.944373383781771,
|
|
"learning_rate": 1.192191718424257e-07,
|
|
"loss": 0.3012745678424835,
|
|
"step": 5355
|
|
},
|
|
{
|
|
"epoch": 2.8130252100840334,
|
|
"grad_norm": 10.114065064506235,
|
|
"learning_rate": 1.1855671413954272e-07,
|
|
"loss": 0.4149806499481201,
|
|
"step": 5356
|
|
},
|
|
{
|
|
"epoch": 2.8135504201680672,
|
|
"grad_norm": 12.746936015257683,
|
|
"learning_rate": 1.1789608000373209e-07,
|
|
"loss": 0.6580238938331604,
|
|
"step": 5357
|
|
},
|
|
{
|
|
"epoch": 2.814075630252101,
|
|
"grad_norm": 10.278430011703913,
|
|
"learning_rate": 1.1723726968178917e-07,
|
|
"loss": 0.303714394569397,
|
|
"step": 5358
|
|
},
|
|
{
|
|
"epoch": 2.8146008403361344,
|
|
"grad_norm": 45.32337131709893,
|
|
"learning_rate": 1.1658028341982486e-07,
|
|
"loss": 1.570613980293274,
|
|
"step": 5359
|
|
},
|
|
{
|
|
"epoch": 2.815126050420168,
|
|
"grad_norm": 9.59833546372829,
|
|
"learning_rate": 1.1592512146327117e-07,
|
|
"loss": 0.468766987323761,
|
|
"step": 5360
|
|
},
|
|
{
|
|
"epoch": 2.8156512605042017,
|
|
"grad_norm": 12.130684925064983,
|
|
"learning_rate": 1.1527178405687845e-07,
|
|
"loss": 0.3492281138896942,
|
|
"step": 5361
|
|
},
|
|
{
|
|
"epoch": 2.8161764705882355,
|
|
"grad_norm": 10.839829582761553,
|
|
"learning_rate": 1.1462027144471367e-07,
|
|
"loss": 0.3299821615219116,
|
|
"step": 5362
|
|
},
|
|
{
|
|
"epoch": 2.816701680672269,
|
|
"grad_norm": 10.197376718305389,
|
|
"learning_rate": 1.139705838701638e-07,
|
|
"loss": 0.7447125315666199,
|
|
"step": 5363
|
|
},
|
|
{
|
|
"epoch": 2.8172268907563023,
|
|
"grad_norm": 10.416405125397606,
|
|
"learning_rate": 1.133227215759336e-07,
|
|
"loss": 0.7418153285980225,
|
|
"step": 5364
|
|
},
|
|
{
|
|
"epoch": 2.817752100840336,
|
|
"grad_norm": 10.707547744824177,
|
|
"learning_rate": 1.1267668480404559e-07,
|
|
"loss": 0.40967997908592224,
|
|
"step": 5365
|
|
},
|
|
{
|
|
"epoch": 2.81827731092437,
|
|
"grad_norm": 13.873791986477118,
|
|
"learning_rate": 1.1203247379584004e-07,
|
|
"loss": 0.4700956344604492,
|
|
"step": 5366
|
|
},
|
|
{
|
|
"epoch": 2.8188025210084033,
|
|
"grad_norm": 9.250387949417668,
|
|
"learning_rate": 1.1139008879197722e-07,
|
|
"loss": 0.6769203543663025,
|
|
"step": 5367
|
|
},
|
|
{
|
|
"epoch": 2.8193277310924367,
|
|
"grad_norm": 13.165084999423746,
|
|
"learning_rate": 1.1074953003243183e-07,
|
|
"loss": 0.329289972782135,
|
|
"step": 5368
|
|
},
|
|
{
|
|
"epoch": 2.8198529411764706,
|
|
"grad_norm": 10.888653879775097,
|
|
"learning_rate": 1.1011079775649969e-07,
|
|
"loss": 0.7494542598724365,
|
|
"step": 5369
|
|
},
|
|
{
|
|
"epoch": 2.8203781512605044,
|
|
"grad_norm": 7.969576908463429,
|
|
"learning_rate": 1.0947389220279214e-07,
|
|
"loss": 0.863798201084137,
|
|
"step": 5370
|
|
},
|
|
{
|
|
"epoch": 2.820903361344538,
|
|
"grad_norm": 16.69518130169497,
|
|
"learning_rate": 1.0883881360923943e-07,
|
|
"loss": 0.966042160987854,
|
|
"step": 5371
|
|
},
|
|
{
|
|
"epoch": 2.821428571428571,
|
|
"grad_norm": 8.572555688495012,
|
|
"learning_rate": 1.082055622130873e-07,
|
|
"loss": 0.27385351061820984,
|
|
"step": 5372
|
|
},
|
|
{
|
|
"epoch": 2.821953781512605,
|
|
"grad_norm": 10.54310458802142,
|
|
"learning_rate": 1.0757413825090212e-07,
|
|
"loss": 0.5568721294403076,
|
|
"step": 5373
|
|
},
|
|
{
|
|
"epoch": 2.822478991596639,
|
|
"grad_norm": 9.10784506239515,
|
|
"learning_rate": 1.0694454195856408e-07,
|
|
"loss": 0.5777037739753723,
|
|
"step": 5374
|
|
},
|
|
{
|
|
"epoch": 2.8230042016806722,
|
|
"grad_norm": 7.909087522378086,
|
|
"learning_rate": 1.0631677357127335e-07,
|
|
"loss": 0.501948893070221,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 2.8235294117647056,
|
|
"grad_norm": 6.986424072356834,
|
|
"learning_rate": 1.0569083332354568e-07,
|
|
"loss": 0.36117836833000183,
|
|
"step": 5376
|
|
},
|
|
{
|
|
"epoch": 2.8240546218487395,
|
|
"grad_norm": 14.008208299228922,
|
|
"learning_rate": 1.0506672144921515e-07,
|
|
"loss": 0.3777201175689697,
|
|
"step": 5377
|
|
},
|
|
{
|
|
"epoch": 2.8245798319327733,
|
|
"grad_norm": 5.040241788861432,
|
|
"learning_rate": 1.0444443818143135e-07,
|
|
"loss": 0.1081673800945282,
|
|
"step": 5378
|
|
},
|
|
{
|
|
"epoch": 2.8251050420168067,
|
|
"grad_norm": 19.53682313756833,
|
|
"learning_rate": 1.0382398375266111e-07,
|
|
"loss": 0.7970004081726074,
|
|
"step": 5379
|
|
},
|
|
{
|
|
"epoch": 2.82563025210084,
|
|
"grad_norm": 8.732013370059734,
|
|
"learning_rate": 1.0320535839468904e-07,
|
|
"loss": 0.21069815754890442,
|
|
"step": 5380
|
|
},
|
|
{
|
|
"epoch": 2.826155462184874,
|
|
"grad_norm": 11.66756751108921,
|
|
"learning_rate": 1.0258856233861524e-07,
|
|
"loss": 0.39165201783180237,
|
|
"step": 5381
|
|
},
|
|
{
|
|
"epoch": 2.8266806722689077,
|
|
"grad_norm": 10.838267364878453,
|
|
"learning_rate": 1.0197359581485821e-07,
|
|
"loss": 0.4563502073287964,
|
|
"step": 5382
|
|
},
|
|
{
|
|
"epoch": 2.827205882352941,
|
|
"grad_norm": 9.686001954281087,
|
|
"learning_rate": 1.0136045905315028e-07,
|
|
"loss": 0.49988648295402527,
|
|
"step": 5383
|
|
},
|
|
{
|
|
"epoch": 2.8277310924369745,
|
|
"grad_norm": 11.290492863035048,
|
|
"learning_rate": 1.0074915228254267e-07,
|
|
"loss": 0.38395699858665466,
|
|
"step": 5384
|
|
},
|
|
{
|
|
"epoch": 2.8282563025210083,
|
|
"grad_norm": 9.097544675573802,
|
|
"learning_rate": 1.0013967573140216e-07,
|
|
"loss": 0.172023743391037,
|
|
"step": 5385
|
|
},
|
|
{
|
|
"epoch": 2.828781512605042,
|
|
"grad_norm": 6.88368428192777,
|
|
"learning_rate": 9.953202962741105e-08,
|
|
"loss": 0.5613949298858643,
|
|
"step": 5386
|
|
},
|
|
{
|
|
"epoch": 2.8293067226890756,
|
|
"grad_norm": 7.980538562005332,
|
|
"learning_rate": 9.892621419756888e-08,
|
|
"loss": 0.42911338806152344,
|
|
"step": 5387
|
|
},
|
|
{
|
|
"epoch": 2.8298319327731094,
|
|
"grad_norm": 9.464845386903423,
|
|
"learning_rate": 9.832222966819015e-08,
|
|
"loss": 0.41949114203453064,
|
|
"step": 5388
|
|
},
|
|
{
|
|
"epoch": 2.830357142857143,
|
|
"grad_norm": 10.902470001210592,
|
|
"learning_rate": 9.77200762649072e-08,
|
|
"loss": 0.6436776518821716,
|
|
"step": 5389
|
|
},
|
|
{
|
|
"epoch": 2.8308823529411766,
|
|
"grad_norm": 12.109360939073499,
|
|
"learning_rate": 9.711975421266673e-08,
|
|
"loss": 0.6845769882202148,
|
|
"step": 5390
|
|
},
|
|
{
|
|
"epoch": 2.83140756302521,
|
|
"grad_norm": 13.991567999585888,
|
|
"learning_rate": 9.652126373573211e-08,
|
|
"loss": 0.743374228477478,
|
|
"step": 5391
|
|
},
|
|
{
|
|
"epoch": 2.831932773109244,
|
|
"grad_norm": 10.004105747890282,
|
|
"learning_rate": 9.592460505768176e-08,
|
|
"loss": 0.50460284948349,
|
|
"step": 5392
|
|
},
|
|
{
|
|
"epoch": 2.8324579831932772,
|
|
"grad_norm": 12.033271998192342,
|
|
"learning_rate": 9.532977840141123e-08,
|
|
"loss": 0.5155885815620422,
|
|
"step": 5393
|
|
},
|
|
{
|
|
"epoch": 2.832983193277311,
|
|
"grad_norm": 11.297583392445413,
|
|
"learning_rate": 9.47367839891289e-08,
|
|
"loss": 0.5681381821632385,
|
|
"step": 5394
|
|
},
|
|
{
|
|
"epoch": 2.8335084033613445,
|
|
"grad_norm": 9.224849387242672,
|
|
"learning_rate": 9.414562204236199e-08,
|
|
"loss": 0.3853095769882202,
|
|
"step": 5395
|
|
},
|
|
{
|
|
"epoch": 2.8340336134453783,
|
|
"grad_norm": 8.496614906852798,
|
|
"learning_rate": 9.355629278195111e-08,
|
|
"loss": 0.7218843102455139,
|
|
"step": 5396
|
|
},
|
|
{
|
|
"epoch": 2.8345588235294117,
|
|
"grad_norm": 12.94308171771513,
|
|
"learning_rate": 9.29687964280529e-08,
|
|
"loss": 0.7495203614234924,
|
|
"step": 5397
|
|
},
|
|
{
|
|
"epoch": 2.8350840336134455,
|
|
"grad_norm": 17.18270599819209,
|
|
"learning_rate": 9.238313320013903e-08,
|
|
"loss": 1.6747816801071167,
|
|
"step": 5398
|
|
},
|
|
{
|
|
"epoch": 2.835609243697479,
|
|
"grad_norm": 14.64959043691776,
|
|
"learning_rate": 9.179930331699615e-08,
|
|
"loss": 0.4574100971221924,
|
|
"step": 5399
|
|
},
|
|
{
|
|
"epoch": 2.8361344537815127,
|
|
"grad_norm": 12.818606677492864,
|
|
"learning_rate": 9.121730699672704e-08,
|
|
"loss": 0.5144633650779724,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 2.836659663865546,
|
|
"grad_norm": 15.213620111633261,
|
|
"learning_rate": 9.063714445674776e-08,
|
|
"loss": 0.37657615542411804,
|
|
"step": 5401
|
|
},
|
|
{
|
|
"epoch": 2.83718487394958,
|
|
"grad_norm": 12.468471476033974,
|
|
"learning_rate": 9.005881591379161e-08,
|
|
"loss": 1.0478260517120361,
|
|
"step": 5402
|
|
},
|
|
{
|
|
"epoch": 2.8377100840336134,
|
|
"grad_norm": 20.589081134747186,
|
|
"learning_rate": 8.948232158390468e-08,
|
|
"loss": 0.8107958436012268,
|
|
"step": 5403
|
|
},
|
|
{
|
|
"epoch": 2.838235294117647,
|
|
"grad_norm": 7.59970019977053,
|
|
"learning_rate": 8.890766168244913e-08,
|
|
"loss": 0.3058919608592987,
|
|
"step": 5404
|
|
},
|
|
{
|
|
"epoch": 2.8387605042016806,
|
|
"grad_norm": 8.897228334750066,
|
|
"learning_rate": 8.833483642410101e-08,
|
|
"loss": 0.29552382230758667,
|
|
"step": 5405
|
|
},
|
|
{
|
|
"epoch": 2.8392857142857144,
|
|
"grad_norm": 11.976288543042422,
|
|
"learning_rate": 8.776384602285193e-08,
|
|
"loss": 0.36509275436401367,
|
|
"step": 5406
|
|
},
|
|
{
|
|
"epoch": 2.839810924369748,
|
|
"grad_norm": 12.87114970838843,
|
|
"learning_rate": 8.719469069200737e-08,
|
|
"loss": 0.9010012149810791,
|
|
"step": 5407
|
|
},
|
|
{
|
|
"epoch": 2.8403361344537816,
|
|
"grad_norm": 12.447854522581904,
|
|
"learning_rate": 8.662737064418725e-08,
|
|
"loss": 0.514291524887085,
|
|
"step": 5408
|
|
},
|
|
{
|
|
"epoch": 2.840861344537815,
|
|
"grad_norm": 11.513357376603228,
|
|
"learning_rate": 8.606188609132593e-08,
|
|
"loss": 0.38690242171287537,
|
|
"step": 5409
|
|
},
|
|
{
|
|
"epoch": 2.841386554621849,
|
|
"grad_norm": 12.003519814690758,
|
|
"learning_rate": 8.54982372446722e-08,
|
|
"loss": 0.8508661985397339,
|
|
"step": 5410
|
|
},
|
|
{
|
|
"epoch": 2.8419117647058822,
|
|
"grad_norm": 7.7949565701068755,
|
|
"learning_rate": 8.493642431478877e-08,
|
|
"loss": 0.28931260108947754,
|
|
"step": 5411
|
|
},
|
|
{
|
|
"epoch": 2.842436974789916,
|
|
"grad_norm": 12.18276386266004,
|
|
"learning_rate": 8.437644751155383e-08,
|
|
"loss": 0.5548318028450012,
|
|
"step": 5412
|
|
},
|
|
{
|
|
"epoch": 2.8429621848739495,
|
|
"grad_norm": 13.121820859416536,
|
|
"learning_rate": 8.381830704415839e-08,
|
|
"loss": 0.43945053219795227,
|
|
"step": 5413
|
|
},
|
|
{
|
|
"epoch": 2.8434873949579833,
|
|
"grad_norm": 13.670072708652555,
|
|
"learning_rate": 8.326200312110732e-08,
|
|
"loss": 0.46232089400291443,
|
|
"step": 5414
|
|
},
|
|
{
|
|
"epoch": 2.8440126050420167,
|
|
"grad_norm": 11.601618264853066,
|
|
"learning_rate": 8.270753595021941e-08,
|
|
"loss": 0.2802753448486328,
|
|
"step": 5415
|
|
},
|
|
{
|
|
"epoch": 2.8445378151260505,
|
|
"grad_norm": 14.069716355246122,
|
|
"learning_rate": 8.215490573862838e-08,
|
|
"loss": 0.681830644607544,
|
|
"step": 5416
|
|
},
|
|
{
|
|
"epoch": 2.845063025210084,
|
|
"grad_norm": 9.981005871385653,
|
|
"learning_rate": 8.160411269278079e-08,
|
|
"loss": 0.30614691972732544,
|
|
"step": 5417
|
|
},
|
|
{
|
|
"epoch": 2.8455882352941178,
|
|
"grad_norm": 17.493708242652872,
|
|
"learning_rate": 8.105515701843703e-08,
|
|
"loss": 0.3944821357727051,
|
|
"step": 5418
|
|
},
|
|
{
|
|
"epoch": 2.846113445378151,
|
|
"grad_norm": 22.31584618961918,
|
|
"learning_rate": 8.050803892067139e-08,
|
|
"loss": 0.5468156337738037,
|
|
"step": 5419
|
|
},
|
|
{
|
|
"epoch": 2.846638655462185,
|
|
"grad_norm": 6.983503197041113,
|
|
"learning_rate": 7.996275860387149e-08,
|
|
"loss": 0.2810583710670471,
|
|
"step": 5420
|
|
},
|
|
{
|
|
"epoch": 2.8471638655462184,
|
|
"grad_norm": 8.057975557851535,
|
|
"learning_rate": 7.941931627173827e-08,
|
|
"loss": 0.6226363182067871,
|
|
"step": 5421
|
|
},
|
|
{
|
|
"epoch": 2.847689075630252,
|
|
"grad_norm": 8.462203546292969,
|
|
"learning_rate": 7.887771212728601e-08,
|
|
"loss": 0.3900071680545807,
|
|
"step": 5422
|
|
},
|
|
{
|
|
"epoch": 2.8482142857142856,
|
|
"grad_norm": 13.5572522209641,
|
|
"learning_rate": 7.833794637284232e-08,
|
|
"loss": 0.3240922689437866,
|
|
"step": 5423
|
|
},
|
|
{
|
|
"epoch": 2.8487394957983194,
|
|
"grad_norm": 11.616897923443245,
|
|
"learning_rate": 7.780001921004864e-08,
|
|
"loss": 0.34255099296569824,
|
|
"step": 5424
|
|
},
|
|
{
|
|
"epoch": 2.849264705882353,
|
|
"grad_norm": 9.4279151169318,
|
|
"learning_rate": 7.726393083985929e-08,
|
|
"loss": 0.9534785747528076,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 2.8497899159663866,
|
|
"grad_norm": 9.868687622826036,
|
|
"learning_rate": 7.672968146254068e-08,
|
|
"loss": 0.27143174409866333,
|
|
"step": 5426
|
|
},
|
|
{
|
|
"epoch": 2.85031512605042,
|
|
"grad_norm": 11.477046303047926,
|
|
"learning_rate": 7.61972712776743e-08,
|
|
"loss": 0.3544968366622925,
|
|
"step": 5427
|
|
},
|
|
{
|
|
"epoch": 2.850840336134454,
|
|
"grad_norm": 8.630011008442724,
|
|
"learning_rate": 7.566670048415214e-08,
|
|
"loss": 0.2991662621498108,
|
|
"step": 5428
|
|
},
|
|
{
|
|
"epoch": 2.8513655462184873,
|
|
"grad_norm": 9.050442791986411,
|
|
"learning_rate": 7.513796928018069e-08,
|
|
"loss": 0.6704224348068237,
|
|
"step": 5429
|
|
},
|
|
{
|
|
"epoch": 2.851890756302521,
|
|
"grad_norm": 8.472622961682655,
|
|
"learning_rate": 7.46110778632786e-08,
|
|
"loss": 0.2958106994628906,
|
|
"step": 5430
|
|
},
|
|
{
|
|
"epoch": 2.8524159663865545,
|
|
"grad_norm": 10.885641210710723,
|
|
"learning_rate": 7.408602643027729e-08,
|
|
"loss": 0.18826164305210114,
|
|
"step": 5431
|
|
},
|
|
{
|
|
"epoch": 2.8529411764705883,
|
|
"grad_norm": 8.202291202121717,
|
|
"learning_rate": 7.356281517732156e-08,
|
|
"loss": 0.5394679307937622,
|
|
"step": 5432
|
|
},
|
|
{
|
|
"epoch": 2.8534663865546217,
|
|
"grad_norm": 9.099191152457923,
|
|
"learning_rate": 7.30414442998667e-08,
|
|
"loss": 0.22680304944515228,
|
|
"step": 5433
|
|
},
|
|
{
|
|
"epoch": 2.8539915966386555,
|
|
"grad_norm": 9.571015785775163,
|
|
"learning_rate": 7.25219139926836e-08,
|
|
"loss": 0.5800913572311401,
|
|
"step": 5434
|
|
},
|
|
{
|
|
"epoch": 2.854516806722689,
|
|
"grad_norm": 8.220725070899238,
|
|
"learning_rate": 7.200422444985312e-08,
|
|
"loss": 0.22415028512477875,
|
|
"step": 5435
|
|
},
|
|
{
|
|
"epoch": 2.8550420168067228,
|
|
"grad_norm": 15.670997270041683,
|
|
"learning_rate": 7.148837586476887e-08,
|
|
"loss": 0.29121801257133484,
|
|
"step": 5436
|
|
},
|
|
{
|
|
"epoch": 2.855567226890756,
|
|
"grad_norm": 13.481412102366525,
|
|
"learning_rate": 7.097436843013783e-08,
|
|
"loss": 0.40365713834762573,
|
|
"step": 5437
|
|
},
|
|
{
|
|
"epoch": 2.85609243697479,
|
|
"grad_norm": 12.765602714375742,
|
|
"learning_rate": 7.046220233797752e-08,
|
|
"loss": 0.2992285490036011,
|
|
"step": 5438
|
|
},
|
|
{
|
|
"epoch": 2.8566176470588234,
|
|
"grad_norm": 14.3976847124799,
|
|
"learning_rate": 6.995187777961931e-08,
|
|
"loss": 0.7297098636627197,
|
|
"step": 5439
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 9.65972866568365,
|
|
"learning_rate": 6.944339494570517e-08,
|
|
"loss": 0.7718173861503601,
|
|
"step": 5440
|
|
},
|
|
{
|
|
"epoch": 2.8576680672268906,
|
|
"grad_norm": 14.846550129795787,
|
|
"learning_rate": 6.893675402618982e-08,
|
|
"loss": 1.429620385169983,
|
|
"step": 5441
|
|
},
|
|
{
|
|
"epoch": 2.8581932773109244,
|
|
"grad_norm": 9.904965079651344,
|
|
"learning_rate": 6.843195521034018e-08,
|
|
"loss": 0.3568647801876068,
|
|
"step": 5442
|
|
},
|
|
{
|
|
"epoch": 2.858718487394958,
|
|
"grad_norm": 8.705997120983524,
|
|
"learning_rate": 6.792899868673487e-08,
|
|
"loss": 0.4867497384548187,
|
|
"step": 5443
|
|
},
|
|
{
|
|
"epoch": 2.8592436974789917,
|
|
"grad_norm": 8.929695338280712,
|
|
"learning_rate": 6.742788464326245e-08,
|
|
"loss": 0.5628509521484375,
|
|
"step": 5444
|
|
},
|
|
{
|
|
"epoch": 2.8597689075630255,
|
|
"grad_norm": 12.220493455838309,
|
|
"learning_rate": 6.692861326712652e-08,
|
|
"loss": 0.9987137913703918,
|
|
"step": 5445
|
|
},
|
|
{
|
|
"epoch": 2.860294117647059,
|
|
"grad_norm": 12.507261218890108,
|
|
"learning_rate": 6.643118474483956e-08,
|
|
"loss": 0.42365092039108276,
|
|
"step": 5446
|
|
},
|
|
{
|
|
"epoch": 2.8608193277310923,
|
|
"grad_norm": 9.46656640852272,
|
|
"learning_rate": 6.593559926222682e-08,
|
|
"loss": 0.518358588218689,
|
|
"step": 5447
|
|
},
|
|
{
|
|
"epoch": 2.861344537815126,
|
|
"grad_norm": 12.063900217699144,
|
|
"learning_rate": 6.544185700442407e-08,
|
|
"loss": 0.2418670952320099,
|
|
"step": 5448
|
|
},
|
|
{
|
|
"epoch": 2.86186974789916,
|
|
"grad_norm": 14.992423646784777,
|
|
"learning_rate": 6.494995815588101e-08,
|
|
"loss": 0.7602907419204712,
|
|
"step": 5449
|
|
},
|
|
{
|
|
"epoch": 2.8623949579831933,
|
|
"grad_norm": 12.546609917885268,
|
|
"learning_rate": 6.445990290035509e-08,
|
|
"loss": 0.5298495888710022,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 2.8629201680672267,
|
|
"grad_norm": 12.68031433329933,
|
|
"learning_rate": 6.397169142091819e-08,
|
|
"loss": 0.23322808742523193,
|
|
"step": 5451
|
|
},
|
|
{
|
|
"epoch": 2.8634453781512605,
|
|
"grad_norm": 13.685008586017366,
|
|
"learning_rate": 6.34853238999511e-08,
|
|
"loss": 0.4290042519569397,
|
|
"step": 5452
|
|
},
|
|
{
|
|
"epoch": 2.8639705882352944,
|
|
"grad_norm": 9.16347533489729,
|
|
"learning_rate": 6.300080051914792e-08,
|
|
"loss": 0.8632311820983887,
|
|
"step": 5453
|
|
},
|
|
{
|
|
"epoch": 2.8644957983193278,
|
|
"grad_norm": 7.432043220990809,
|
|
"learning_rate": 6.251812145951163e-08,
|
|
"loss": 0.35551148653030396,
|
|
"step": 5454
|
|
},
|
|
{
|
|
"epoch": 2.865021008403361,
|
|
"grad_norm": 16.28827793340928,
|
|
"learning_rate": 6.203728690135691e-08,
|
|
"loss": 0.7173649072647095,
|
|
"step": 5455
|
|
},
|
|
{
|
|
"epoch": 2.865546218487395,
|
|
"grad_norm": 8.500449978960063,
|
|
"learning_rate": 6.15582970243117e-08,
|
|
"loss": 0.46399128437042236,
|
|
"step": 5456
|
|
},
|
|
{
|
|
"epoch": 2.866071428571429,
|
|
"grad_norm": 8.112111415134255,
|
|
"learning_rate": 6.108115200731069e-08,
|
|
"loss": 0.2686334252357483,
|
|
"step": 5457
|
|
},
|
|
{
|
|
"epoch": 2.866596638655462,
|
|
"grad_norm": 11.915682938336953,
|
|
"learning_rate": 6.060585202860291e-08,
|
|
"loss": 0.24891838431358337,
|
|
"step": 5458
|
|
},
|
|
{
|
|
"epoch": 2.8671218487394956,
|
|
"grad_norm": 7.550971866134155,
|
|
"learning_rate": 6.013239726574694e-08,
|
|
"loss": 0.5653460621833801,
|
|
"step": 5459
|
|
},
|
|
{
|
|
"epoch": 2.8676470588235294,
|
|
"grad_norm": 14.66350281455618,
|
|
"learning_rate": 5.96607878956107e-08,
|
|
"loss": 0.3963346779346466,
|
|
"step": 5460
|
|
},
|
|
{
|
|
"epoch": 2.8681722689075633,
|
|
"grad_norm": 19.335434005393587,
|
|
"learning_rate": 5.9191024094374384e-08,
|
|
"loss": 0.42505916953086853,
|
|
"step": 5461
|
|
},
|
|
{
|
|
"epoch": 2.8686974789915967,
|
|
"grad_norm": 8.58848108397314,
|
|
"learning_rate": 5.872310603752873e-08,
|
|
"loss": 0.22156614065170288,
|
|
"step": 5462
|
|
},
|
|
{
|
|
"epoch": 2.86922268907563,
|
|
"grad_norm": 10.815123405405565,
|
|
"learning_rate": 5.825703389987392e-08,
|
|
"loss": 0.31902599334716797,
|
|
"step": 5463
|
|
},
|
|
{
|
|
"epoch": 2.869747899159664,
|
|
"grad_norm": 10.538887002681403,
|
|
"learning_rate": 5.7792807855521774e-08,
|
|
"loss": 0.6373153924942017,
|
|
"step": 5464
|
|
},
|
|
{
|
|
"epoch": 2.8702731092436977,
|
|
"grad_norm": 7.739173010504628,
|
|
"learning_rate": 5.7330428077893575e-08,
|
|
"loss": 0.3954012989997864,
|
|
"step": 5465
|
|
},
|
|
{
|
|
"epoch": 2.870798319327731,
|
|
"grad_norm": 15.16119100932093,
|
|
"learning_rate": 5.68698947397206e-08,
|
|
"loss": 0.7021001577377319,
|
|
"step": 5466
|
|
},
|
|
{
|
|
"epoch": 2.8713235294117645,
|
|
"grad_norm": 10.034576471833478,
|
|
"learning_rate": 5.641120801304523e-08,
|
|
"loss": 0.5146601796150208,
|
|
"step": 5467
|
|
},
|
|
{
|
|
"epoch": 2.8718487394957983,
|
|
"grad_norm": 14.995480459574054,
|
|
"learning_rate": 5.5954368069219834e-08,
|
|
"loss": 0.5189433097839355,
|
|
"step": 5468
|
|
},
|
|
{
|
|
"epoch": 2.872373949579832,
|
|
"grad_norm": 11.433428653053515,
|
|
"learning_rate": 5.5499375078906793e-08,
|
|
"loss": 0.27618587017059326,
|
|
"step": 5469
|
|
},
|
|
{
|
|
"epoch": 2.8728991596638656,
|
|
"grad_norm": 6.885839621084402,
|
|
"learning_rate": 5.504622921207736e-08,
|
|
"loss": 0.28780463337898254,
|
|
"step": 5470
|
|
},
|
|
{
|
|
"epoch": 2.873424369747899,
|
|
"grad_norm": 7.0398569741963115,
|
|
"learning_rate": 5.4594930638015574e-08,
|
|
"loss": 0.3494113087654114,
|
|
"step": 5471
|
|
},
|
|
{
|
|
"epoch": 2.8739495798319328,
|
|
"grad_norm": 10.392317991672112,
|
|
"learning_rate": 5.414547952531213e-08,
|
|
"loss": 0.6539039015769958,
|
|
"step": 5472
|
|
},
|
|
{
|
|
"epoch": 2.8744747899159666,
|
|
"grad_norm": 10.463577463377476,
|
|
"learning_rate": 5.369787604186993e-08,
|
|
"loss": 0.5085176229476929,
|
|
"step": 5473
|
|
},
|
|
{
|
|
"epoch": 2.875,
|
|
"grad_norm": 13.109319236077438,
|
|
"learning_rate": 5.325212035490024e-08,
|
|
"loss": 0.44301432371139526,
|
|
"step": 5474
|
|
},
|
|
{
|
|
"epoch": 2.8755252100840334,
|
|
"grad_norm": 11.177983266236517,
|
|
"learning_rate": 5.2808212630925395e-08,
|
|
"loss": 0.2431657910346985,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 2.8760504201680672,
|
|
"grad_norm": 9.547790291300467,
|
|
"learning_rate": 5.236615303577552e-08,
|
|
"loss": 0.19286304712295532,
|
|
"step": 5476
|
|
},
|
|
{
|
|
"epoch": 2.876575630252101,
|
|
"grad_norm": 9.332135530406768,
|
|
"learning_rate": 5.192594173459242e-08,
|
|
"loss": 0.3314986228942871,
|
|
"step": 5477
|
|
},
|
|
{
|
|
"epoch": 2.8771008403361344,
|
|
"grad_norm": 12.756189824241634,
|
|
"learning_rate": 5.148757889182565e-08,
|
|
"loss": 0.3398740887641907,
|
|
"step": 5478
|
|
},
|
|
{
|
|
"epoch": 2.877626050420168,
|
|
"grad_norm": 10.322278129800047,
|
|
"learning_rate": 5.105106467123477e-08,
|
|
"loss": 0.3892437517642975,
|
|
"step": 5479
|
|
},
|
|
{
|
|
"epoch": 2.8781512605042017,
|
|
"grad_norm": 10.429227622362067,
|
|
"learning_rate": 5.06163992358899e-08,
|
|
"loss": 0.23828959465026855,
|
|
"step": 5480
|
|
},
|
|
{
|
|
"epoch": 2.8786764705882355,
|
|
"grad_norm": 12.802806398235582,
|
|
"learning_rate": 5.018358274816892e-08,
|
|
"loss": 0.5334790349006653,
|
|
"step": 5481
|
|
},
|
|
{
|
|
"epoch": 2.879201680672269,
|
|
"grad_norm": 12.025231146183835,
|
|
"learning_rate": 4.975261536975973e-08,
|
|
"loss": 0.22797216475009918,
|
|
"step": 5482
|
|
},
|
|
{
|
|
"epoch": 2.8797268907563023,
|
|
"grad_norm": 15.340707137384607,
|
|
"learning_rate": 4.9323497261659635e-08,
|
|
"loss": 0.4636194109916687,
|
|
"step": 5483
|
|
},
|
|
{
|
|
"epoch": 2.880252100840336,
|
|
"grad_norm": 9.916330054183163,
|
|
"learning_rate": 4.889622858417431e-08,
|
|
"loss": 0.31082162261009216,
|
|
"step": 5484
|
|
},
|
|
{
|
|
"epoch": 2.88077731092437,
|
|
"grad_norm": 12.900591211866876,
|
|
"learning_rate": 4.847080949691996e-08,
|
|
"loss": 0.4545387625694275,
|
|
"step": 5485
|
|
},
|
|
{
|
|
"epoch": 2.8813025210084033,
|
|
"grad_norm": 9.614124620505395,
|
|
"learning_rate": 4.8047240158819456e-08,
|
|
"loss": 0.31297287344932556,
|
|
"step": 5486
|
|
},
|
|
{
|
|
"epoch": 2.8818277310924367,
|
|
"grad_norm": 8.079027376898408,
|
|
"learning_rate": 4.7625520728107885e-08,
|
|
"loss": 0.44994843006134033,
|
|
"step": 5487
|
|
},
|
|
{
|
|
"epoch": 2.8823529411764706,
|
|
"grad_norm": 20.39232014648757,
|
|
"learning_rate": 4.7205651362326467e-08,
|
|
"loss": 0.5275421142578125,
|
|
"step": 5488
|
|
},
|
|
{
|
|
"epoch": 2.8828781512605044,
|
|
"grad_norm": 6.461821347193511,
|
|
"learning_rate": 4.6787632218326385e-08,
|
|
"loss": 0.12535065412521362,
|
|
"step": 5489
|
|
},
|
|
{
|
|
"epoch": 2.883403361344538,
|
|
"grad_norm": 14.803654230875447,
|
|
"learning_rate": 4.637146345226828e-08,
|
|
"loss": 0.2589206099510193,
|
|
"step": 5490
|
|
},
|
|
{
|
|
"epoch": 2.883928571428571,
|
|
"grad_norm": 9.213012065927948,
|
|
"learning_rate": 4.595714521962003e-08,
|
|
"loss": 0.674689531326294,
|
|
"step": 5491
|
|
},
|
|
{
|
|
"epoch": 2.884453781512605,
|
|
"grad_norm": 9.93503370366437,
|
|
"learning_rate": 4.554467767515947e-08,
|
|
"loss": 0.45697227120399475,
|
|
"step": 5492
|
|
},
|
|
{
|
|
"epoch": 2.884978991596639,
|
|
"grad_norm": 10.199226438660155,
|
|
"learning_rate": 4.513406097297224e-08,
|
|
"loss": 1.5334389209747314,
|
|
"step": 5493
|
|
},
|
|
{
|
|
"epoch": 2.8855042016806722,
|
|
"grad_norm": 12.688864967982823,
|
|
"learning_rate": 4.4725295266453414e-08,
|
|
"loss": 0.6125204563140869,
|
|
"step": 5494
|
|
},
|
|
{
|
|
"epoch": 2.8860294117647056,
|
|
"grad_norm": 10.180992691466882,
|
|
"learning_rate": 4.4318380708305854e-08,
|
|
"loss": 0.32915446162223816,
|
|
"step": 5495
|
|
},
|
|
{
|
|
"epoch": 2.8865546218487395,
|
|
"grad_norm": 7.707363991047359,
|
|
"learning_rate": 4.391331745054128e-08,
|
|
"loss": 0.2560163736343384,
|
|
"step": 5496
|
|
},
|
|
{
|
|
"epoch": 2.8870798319327733,
|
|
"grad_norm": 9.08705619682907,
|
|
"learning_rate": 4.351010564447977e-08,
|
|
"loss": 0.3261929750442505,
|
|
"step": 5497
|
|
},
|
|
{
|
|
"epoch": 2.8876050420168067,
|
|
"grad_norm": 16.527528011698962,
|
|
"learning_rate": 4.3108745440749723e-08,
|
|
"loss": 0.6061902642250061,
|
|
"step": 5498
|
|
},
|
|
{
|
|
"epoch": 2.88813025210084,
|
|
"grad_norm": 11.518720452811419,
|
|
"learning_rate": 4.2709236989287305e-08,
|
|
"loss": 1.0793002843856812,
|
|
"step": 5499
|
|
},
|
|
{
|
|
"epoch": 2.888655462184874,
|
|
"grad_norm": 10.518716502044702,
|
|
"learning_rate": 4.231158043933814e-08,
|
|
"loss": 0.44680339097976685,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 2.8891806722689077,
|
|
"grad_norm": 13.78884730075112,
|
|
"learning_rate": 4.1915775939454506e-08,
|
|
"loss": 0.5705138444900513,
|
|
"step": 5501
|
|
},
|
|
{
|
|
"epoch": 2.889705882352941,
|
|
"grad_norm": 11.969615813507975,
|
|
"learning_rate": 4.1521823637498125e-08,
|
|
"loss": 0.2875642776489258,
|
|
"step": 5502
|
|
},
|
|
{
|
|
"epoch": 2.8902310924369745,
|
|
"grad_norm": 9.214314533689762,
|
|
"learning_rate": 4.112972368063794e-08,
|
|
"loss": 0.5676702857017517,
|
|
"step": 5503
|
|
},
|
|
{
|
|
"epoch": 2.8907563025210083,
|
|
"grad_norm": 26.369423471159987,
|
|
"learning_rate": 4.073947621535179e-08,
|
|
"loss": 2.802802324295044,
|
|
"step": 5504
|
|
},
|
|
{
|
|
"epoch": 2.891281512605042,
|
|
"grad_norm": 20.98682174304777,
|
|
"learning_rate": 4.035108138742416e-08,
|
|
"loss": 0.8653183579444885,
|
|
"step": 5505
|
|
},
|
|
{
|
|
"epoch": 2.8918067226890756,
|
|
"grad_norm": 15.171345850810335,
|
|
"learning_rate": 3.996453934194899e-08,
|
|
"loss": 0.5532779097557068,
|
|
"step": 5506
|
|
},
|
|
{
|
|
"epoch": 2.8923319327731094,
|
|
"grad_norm": 18.425444751794625,
|
|
"learning_rate": 3.9579850223326887e-08,
|
|
"loss": 1.120945692062378,
|
|
"step": 5507
|
|
},
|
|
{
|
|
"epoch": 2.892857142857143,
|
|
"grad_norm": 13.827765534186234,
|
|
"learning_rate": 3.9197014175266226e-08,
|
|
"loss": 0.5387973189353943,
|
|
"step": 5508
|
|
},
|
|
{
|
|
"epoch": 2.8933823529411766,
|
|
"grad_norm": 16.027681711092658,
|
|
"learning_rate": 3.881603134078482e-08,
|
|
"loss": 0.5398526191711426,
|
|
"step": 5509
|
|
},
|
|
{
|
|
"epoch": 2.89390756302521,
|
|
"grad_norm": 15.81006173365238,
|
|
"learning_rate": 3.84369018622055e-08,
|
|
"loss": 0.5760936737060547,
|
|
"step": 5510
|
|
},
|
|
{
|
|
"epoch": 2.894432773109244,
|
|
"grad_norm": 7.814529768780656,
|
|
"learning_rate": 3.805962588116108e-08,
|
|
"loss": 0.7528111934661865,
|
|
"step": 5511
|
|
},
|
|
{
|
|
"epoch": 2.8949579831932772,
|
|
"grad_norm": 12.499997463553747,
|
|
"learning_rate": 3.768420353859048e-08,
|
|
"loss": 0.3807353079319,
|
|
"step": 5512
|
|
},
|
|
{
|
|
"epoch": 2.895483193277311,
|
|
"grad_norm": 9.174869443834577,
|
|
"learning_rate": 3.731063497474152e-08,
|
|
"loss": 0.6745905876159668,
|
|
"step": 5513
|
|
},
|
|
{
|
|
"epoch": 2.8960084033613445,
|
|
"grad_norm": 9.461731345377686,
|
|
"learning_rate": 3.693892032916757e-08,
|
|
"loss": 0.42122939229011536,
|
|
"step": 5514
|
|
},
|
|
{
|
|
"epoch": 2.8965336134453783,
|
|
"grad_norm": 10.522417401812165,
|
|
"learning_rate": 3.656905974073144e-08,
|
|
"loss": 0.8252875804901123,
|
|
"step": 5515
|
|
},
|
|
{
|
|
"epoch": 2.8970588235294117,
|
|
"grad_norm": 14.425462061084643,
|
|
"learning_rate": 3.620105334760205e-08,
|
|
"loss": 0.34680360555648804,
|
|
"step": 5516
|
|
},
|
|
{
|
|
"epoch": 2.8975840336134455,
|
|
"grad_norm": 9.202109117314102,
|
|
"learning_rate": 3.583490128725553e-08,
|
|
"loss": 0.6464153528213501,
|
|
"step": 5517
|
|
},
|
|
{
|
|
"epoch": 2.898109243697479,
|
|
"grad_norm": 12.648835037111676,
|
|
"learning_rate": 3.547060369647693e-08,
|
|
"loss": 0.4509446620941162,
|
|
"step": 5518
|
|
},
|
|
{
|
|
"epoch": 2.8986344537815127,
|
|
"grad_norm": 17.193040915204442,
|
|
"learning_rate": 3.510816071135681e-08,
|
|
"loss": 0.6343562602996826,
|
|
"step": 5519
|
|
},
|
|
{
|
|
"epoch": 2.899159663865546,
|
|
"grad_norm": 12.106320234862146,
|
|
"learning_rate": 3.474757246729354e-08,
|
|
"loss": 0.7017316818237305,
|
|
"step": 5520
|
|
},
|
|
{
|
|
"epoch": 2.89968487394958,
|
|
"grad_norm": 7.536342382664705,
|
|
"learning_rate": 3.4388839098992154e-08,
|
|
"loss": 0.6642085909843445,
|
|
"step": 5521
|
|
},
|
|
{
|
|
"epoch": 2.9002100840336134,
|
|
"grad_norm": 13.947203182987222,
|
|
"learning_rate": 3.403196074046544e-08,
|
|
"loss": 0.2990296185016632,
|
|
"step": 5522
|
|
},
|
|
{
|
|
"epoch": 2.900735294117647,
|
|
"grad_norm": 8.862421317089296,
|
|
"learning_rate": 3.3676937525032314e-08,
|
|
"loss": 0.4765544533729553,
|
|
"step": 5523
|
|
},
|
|
{
|
|
"epoch": 2.9012605042016806,
|
|
"grad_norm": 14.64265029931189,
|
|
"learning_rate": 3.3323769585320575e-08,
|
|
"loss": 0.5519246459007263,
|
|
"step": 5524
|
|
},
|
|
{
|
|
"epoch": 2.9017857142857144,
|
|
"grad_norm": 10.032561602461207,
|
|
"learning_rate": 3.2972457053262466e-08,
|
|
"loss": 0.4705575108528137,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 2.902310924369748,
|
|
"grad_norm": 11.31221576743726,
|
|
"learning_rate": 3.2623000060099106e-08,
|
|
"loss": 0.8108884692192078,
|
|
"step": 5526
|
|
},
|
|
{
|
|
"epoch": 2.9028361344537816,
|
|
"grad_norm": 8.313214471537519,
|
|
"learning_rate": 3.227539873637664e-08,
|
|
"loss": 0.36999955773353577,
|
|
"step": 5527
|
|
},
|
|
{
|
|
"epoch": 2.903361344537815,
|
|
"grad_norm": 16.498289568601937,
|
|
"learning_rate": 3.192965321195007e-08,
|
|
"loss": 0.9184820652008057,
|
|
"step": 5528
|
|
},
|
|
{
|
|
"epoch": 2.903886554621849,
|
|
"grad_norm": 8.01495543923635,
|
|
"learning_rate": 3.158576361597887e-08,
|
|
"loss": 0.2793254256248474,
|
|
"step": 5529
|
|
},
|
|
{
|
|
"epoch": 2.9044117647058822,
|
|
"grad_norm": 7.314269170981081,
|
|
"learning_rate": 3.124373007693082e-08,
|
|
"loss": 0.5376075506210327,
|
|
"step": 5530
|
|
},
|
|
{
|
|
"epoch": 2.904936974789916,
|
|
"grad_norm": 11.086366691726877,
|
|
"learning_rate": 3.090355272257983e-08,
|
|
"loss": 0.2915005683898926,
|
|
"step": 5531
|
|
},
|
|
{
|
|
"epoch": 2.9054621848739495,
|
|
"grad_norm": 7.423482865502868,
|
|
"learning_rate": 3.0565231680007024e-08,
|
|
"loss": 0.26723939180374146,
|
|
"step": 5532
|
|
},
|
|
{
|
|
"epoch": 2.9059873949579833,
|
|
"grad_norm": 11.329995064799036,
|
|
"learning_rate": 3.022876707559796e-08,
|
|
"loss": 0.3294193148612976,
|
|
"step": 5533
|
|
},
|
|
{
|
|
"epoch": 2.9065126050420167,
|
|
"grad_norm": 8.381833455253949,
|
|
"learning_rate": 2.9894159035047666e-08,
|
|
"loss": 0.5151461958885193,
|
|
"step": 5534
|
|
},
|
|
{
|
|
"epoch": 2.9070378151260505,
|
|
"grad_norm": 13.08703037335673,
|
|
"learning_rate": 2.9561407683355027e-08,
|
|
"loss": 0.9340593218803406,
|
|
"step": 5535
|
|
},
|
|
{
|
|
"epoch": 2.907563025210084,
|
|
"grad_norm": 7.57724074373758,
|
|
"learning_rate": 2.9230513144827277e-08,
|
|
"loss": 0.7225733995437622,
|
|
"step": 5536
|
|
},
|
|
{
|
|
"epoch": 2.9080882352941178,
|
|
"grad_norm": 15.104470484094323,
|
|
"learning_rate": 2.890147554307665e-08,
|
|
"loss": 0.7668702006340027,
|
|
"step": 5537
|
|
},
|
|
{
|
|
"epoch": 2.908613445378151,
|
|
"grad_norm": 12.72051977274319,
|
|
"learning_rate": 2.8574295001021492e-08,
|
|
"loss": 0.3595678508281708,
|
|
"step": 5538
|
|
},
|
|
{
|
|
"epoch": 2.909138655462185,
|
|
"grad_norm": 9.29151576715842,
|
|
"learning_rate": 2.8248971640887913e-08,
|
|
"loss": 0.4152783155441284,
|
|
"step": 5539
|
|
},
|
|
{
|
|
"epoch": 2.9096638655462184,
|
|
"grad_norm": 11.06768050742571,
|
|
"learning_rate": 2.792550558420759e-08,
|
|
"loss": 0.7512202262878418,
|
|
"step": 5540
|
|
},
|
|
{
|
|
"epoch": 2.910189075630252,
|
|
"grad_norm": 10.855614816848565,
|
|
"learning_rate": 2.7603896951817755e-08,
|
|
"loss": 0.6991416811943054,
|
|
"step": 5541
|
|
},
|
|
{
|
|
"epoch": 2.9107142857142856,
|
|
"grad_norm": 9.021255865800349,
|
|
"learning_rate": 2.7284145863861743e-08,
|
|
"loss": 0.797561764717102,
|
|
"step": 5542
|
|
},
|
|
{
|
|
"epoch": 2.9112394957983194,
|
|
"grad_norm": 13.837501672475726,
|
|
"learning_rate": 2.696625243979012e-08,
|
|
"loss": 1.0461021661758423,
|
|
"step": 5543
|
|
},
|
|
{
|
|
"epoch": 2.911764705882353,
|
|
"grad_norm": 7.89782215518247,
|
|
"learning_rate": 2.665021679835844e-08,
|
|
"loss": 0.43711721897125244,
|
|
"step": 5544
|
|
},
|
|
{
|
|
"epoch": 2.9122899159663866,
|
|
"grad_norm": 11.324726518508411,
|
|
"learning_rate": 2.633603905762838e-08,
|
|
"loss": 0.4912574887275696,
|
|
"step": 5545
|
|
},
|
|
{
|
|
"epoch": 2.91281512605042,
|
|
"grad_norm": 20.470491479941607,
|
|
"learning_rate": 2.6023719334967724e-08,
|
|
"loss": 0.48075029253959656,
|
|
"step": 5546
|
|
},
|
|
{
|
|
"epoch": 2.913340336134454,
|
|
"grad_norm": 11.612039587280671,
|
|
"learning_rate": 2.571325774705036e-08,
|
|
"loss": 0.2778392434120178,
|
|
"step": 5547
|
|
},
|
|
{
|
|
"epoch": 2.9138655462184873,
|
|
"grad_norm": 9.99319271774061,
|
|
"learning_rate": 2.5404654409856288e-08,
|
|
"loss": 0.5540282726287842,
|
|
"step": 5548
|
|
},
|
|
{
|
|
"epoch": 2.914390756302521,
|
|
"grad_norm": 10.038184913341135,
|
|
"learning_rate": 2.5097909438669964e-08,
|
|
"loss": 0.6935964226722717,
|
|
"step": 5549
|
|
},
|
|
{
|
|
"epoch": 2.9149159663865545,
|
|
"grad_norm": 23.56634252320344,
|
|
"learning_rate": 2.47930229480825e-08,
|
|
"loss": 0.5073999166488647,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 2.9154411764705883,
|
|
"grad_norm": 9.275570355949101,
|
|
"learning_rate": 2.4489995051991678e-08,
|
|
"loss": 0.4122755229473114,
|
|
"step": 5551
|
|
},
|
|
{
|
|
"epoch": 2.9159663865546217,
|
|
"grad_norm": 10.166117457516405,
|
|
"learning_rate": 2.4188825863599164e-08,
|
|
"loss": 1.0233631134033203,
|
|
"step": 5552
|
|
},
|
|
{
|
|
"epoch": 2.9164915966386555,
|
|
"grad_norm": 10.621888518892153,
|
|
"learning_rate": 2.3889515495413297e-08,
|
|
"loss": 0.3412795960903168,
|
|
"step": 5553
|
|
},
|
|
{
|
|
"epoch": 2.917016806722689,
|
|
"grad_norm": 17.83502737509378,
|
|
"learning_rate": 2.3592064059247967e-08,
|
|
"loss": 0.771084189414978,
|
|
"step": 5554
|
|
},
|
|
{
|
|
"epoch": 2.9175420168067228,
|
|
"grad_norm": 9.974548839878679,
|
|
"learning_rate": 2.329647166622262e-08,
|
|
"loss": 0.46054673194885254,
|
|
"step": 5555
|
|
},
|
|
{
|
|
"epoch": 2.918067226890756,
|
|
"grad_norm": 9.237729916948592,
|
|
"learning_rate": 2.300273842676226e-08,
|
|
"loss": 0.5346497297286987,
|
|
"step": 5556
|
|
},
|
|
{
|
|
"epoch": 2.91859243697479,
|
|
"grad_norm": 9.901566661048781,
|
|
"learning_rate": 2.2710864450596336e-08,
|
|
"loss": 0.9071778059005737,
|
|
"step": 5557
|
|
},
|
|
{
|
|
"epoch": 2.9191176470588234,
|
|
"grad_norm": 15.997355385243313,
|
|
"learning_rate": 2.2420849846761517e-08,
|
|
"loss": 0.5223240256309509,
|
|
"step": 5558
|
|
},
|
|
{
|
|
"epoch": 2.919642857142857,
|
|
"grad_norm": 9.803395559821737,
|
|
"learning_rate": 2.213269472359836e-08,
|
|
"loss": 0.43600529432296753,
|
|
"step": 5559
|
|
},
|
|
{
|
|
"epoch": 2.9201680672268906,
|
|
"grad_norm": 13.010491901735929,
|
|
"learning_rate": 2.1846399188752975e-08,
|
|
"loss": 0.6825572848320007,
|
|
"step": 5560
|
|
},
|
|
{
|
|
"epoch": 2.9206932773109244,
|
|
"grad_norm": 8.188936087268589,
|
|
"learning_rate": 2.1561963349178704e-08,
|
|
"loss": 0.4675530195236206,
|
|
"step": 5561
|
|
},
|
|
{
|
|
"epoch": 2.921218487394958,
|
|
"grad_norm": 10.357185874315604,
|
|
"learning_rate": 2.1279387311131106e-08,
|
|
"loss": 0.3140292763710022,
|
|
"step": 5562
|
|
},
|
|
{
|
|
"epoch": 2.9217436974789917,
|
|
"grad_norm": 20.561503814788562,
|
|
"learning_rate": 2.0998671180172957e-08,
|
|
"loss": 1.3401788473129272,
|
|
"step": 5563
|
|
},
|
|
{
|
|
"epoch": 2.9222689075630255,
|
|
"grad_norm": 10.61845234333747,
|
|
"learning_rate": 2.0719815061172045e-08,
|
|
"loss": 0.2538623809814453,
|
|
"step": 5564
|
|
},
|
|
{
|
|
"epoch": 2.922794117647059,
|
|
"grad_norm": 12.461548877181114,
|
|
"learning_rate": 2.0442819058300588e-08,
|
|
"loss": 0.3584285080432892,
|
|
"step": 5565
|
|
},
|
|
{
|
|
"epoch": 2.9233193277310923,
|
|
"grad_norm": 16.45580997541605,
|
|
"learning_rate": 2.0167683275036376e-08,
|
|
"loss": 0.4364638924598694,
|
|
"step": 5566
|
|
},
|
|
{
|
|
"epoch": 2.923844537815126,
|
|
"grad_norm": 16.462900973950404,
|
|
"learning_rate": 1.9894407814162186e-08,
|
|
"loss": 0.2806379795074463,
|
|
"step": 5567
|
|
},
|
|
{
|
|
"epoch": 2.92436974789916,
|
|
"grad_norm": 9.563375260727465,
|
|
"learning_rate": 1.962299277776636e-08,
|
|
"loss": 0.12565010786056519,
|
|
"step": 5568
|
|
},
|
|
{
|
|
"epoch": 2.9248949579831933,
|
|
"grad_norm": 8.227634681315745,
|
|
"learning_rate": 1.935343826724112e-08,
|
|
"loss": 0.17257870733737946,
|
|
"step": 5569
|
|
},
|
|
{
|
|
"epoch": 2.9254201680672267,
|
|
"grad_norm": 8.187935823789262,
|
|
"learning_rate": 1.908574438328481e-08,
|
|
"loss": 0.37759828567504883,
|
|
"step": 5570
|
|
},
|
|
{
|
|
"epoch": 2.9259453781512605,
|
|
"grad_norm": 7.633462545317691,
|
|
"learning_rate": 1.881991122590021e-08,
|
|
"loss": 0.2629449963569641,
|
|
"step": 5571
|
|
},
|
|
{
|
|
"epoch": 2.9264705882352944,
|
|
"grad_norm": 10.54009413604542,
|
|
"learning_rate": 1.8555938894394554e-08,
|
|
"loss": 0.23956085741519928,
|
|
"step": 5572
|
|
},
|
|
{
|
|
"epoch": 2.9269957983193278,
|
|
"grad_norm": 11.484444966656907,
|
|
"learning_rate": 1.8293827487380623e-08,
|
|
"loss": 0.35607922077178955,
|
|
"step": 5573
|
|
},
|
|
{
|
|
"epoch": 2.927521008403361,
|
|
"grad_norm": 11.831120728173534,
|
|
"learning_rate": 1.8033577102775645e-08,
|
|
"loss": 0.24531540274620056,
|
|
"step": 5574
|
|
},
|
|
{
|
|
"epoch": 2.928046218487395,
|
|
"grad_norm": 8.260889344628955,
|
|
"learning_rate": 1.77751878378013e-08,
|
|
"loss": 0.3835732340812683,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 2.928571428571429,
|
|
"grad_norm": 16.696906888139786,
|
|
"learning_rate": 1.7518659788984817e-08,
|
|
"loss": 0.7035274505615234,
|
|
"step": 5576
|
|
},
|
|
{
|
|
"epoch": 2.929096638655462,
|
|
"grad_norm": 18.3334617633976,
|
|
"learning_rate": 1.726399305215787e-08,
|
|
"loss": 0.562627911567688,
|
|
"step": 5577
|
|
},
|
|
{
|
|
"epoch": 2.9296218487394956,
|
|
"grad_norm": 6.776640164261589,
|
|
"learning_rate": 1.701118772245658e-08,
|
|
"loss": 0.1429840475320816,
|
|
"step": 5578
|
|
},
|
|
{
|
|
"epoch": 2.9301470588235294,
|
|
"grad_norm": 6.560455773753327,
|
|
"learning_rate": 1.6760243894321513e-08,
|
|
"loss": 0.44401729106903076,
|
|
"step": 5579
|
|
},
|
|
{
|
|
"epoch": 2.9306722689075633,
|
|
"grad_norm": 13.492556925985618,
|
|
"learning_rate": 1.651116166149769e-08,
|
|
"loss": 1.0103554725646973,
|
|
"step": 5580
|
|
},
|
|
{
|
|
"epoch": 2.9311974789915967,
|
|
"grad_norm": 9.644731701714283,
|
|
"learning_rate": 1.626394111703622e-08,
|
|
"loss": 0.858881950378418,
|
|
"step": 5581
|
|
},
|
|
{
|
|
"epoch": 2.93172268907563,
|
|
"grad_norm": 11.560863470788007,
|
|
"learning_rate": 1.6018582353290456e-08,
|
|
"loss": 0.2774242162704468,
|
|
"step": 5582
|
|
},
|
|
{
|
|
"epoch": 2.932247899159664,
|
|
"grad_norm": 16.204354110885234,
|
|
"learning_rate": 1.577508546191986e-08,
|
|
"loss": 0.41454803943634033,
|
|
"step": 5583
|
|
},
|
|
{
|
|
"epoch": 2.9327731092436977,
|
|
"grad_norm": 10.980428655057459,
|
|
"learning_rate": 1.5533450533888326e-08,
|
|
"loss": 0.714098334312439,
|
|
"step": 5584
|
|
},
|
|
{
|
|
"epoch": 2.933298319327731,
|
|
"grad_norm": 12.8824573940971,
|
|
"learning_rate": 1.5293677659463104e-08,
|
|
"loss": 0.2153351604938507,
|
|
"step": 5585
|
|
},
|
|
{
|
|
"epoch": 2.9338235294117645,
|
|
"grad_norm": 13.104349080337803,
|
|
"learning_rate": 1.505576692821642e-08,
|
|
"loss": 0.5149781703948975,
|
|
"step": 5586
|
|
},
|
|
{
|
|
"epoch": 2.9343487394957983,
|
|
"grad_norm": 11.003157779077517,
|
|
"learning_rate": 1.4819718429024965e-08,
|
|
"loss": 0.25556814670562744,
|
|
"step": 5587
|
|
},
|
|
{
|
|
"epoch": 2.934873949579832,
|
|
"grad_norm": 12.355349444171148,
|
|
"learning_rate": 1.4585532250070423e-08,
|
|
"loss": 0.7575448751449585,
|
|
"step": 5588
|
|
},
|
|
{
|
|
"epoch": 2.9353991596638656,
|
|
"grad_norm": 12.950030139239844,
|
|
"learning_rate": 1.4353208478837256e-08,
|
|
"loss": 0.6556761264801025,
|
|
"step": 5589
|
|
},
|
|
{
|
|
"epoch": 2.935924369747899,
|
|
"grad_norm": 15.112237567499033,
|
|
"learning_rate": 1.4122747202114928e-08,
|
|
"loss": 0.4966975450515747,
|
|
"step": 5590
|
|
},
|
|
{
|
|
"epoch": 2.9364495798319328,
|
|
"grad_norm": 9.247774954551618,
|
|
"learning_rate": 1.3894148505997352e-08,
|
|
"loss": 0.27970755100250244,
|
|
"step": 5591
|
|
},
|
|
{
|
|
"epoch": 2.9369747899159666,
|
|
"grad_norm": 10.28911719953282,
|
|
"learning_rate": 1.3667412475882325e-08,
|
|
"loss": 0.6997286677360535,
|
|
"step": 5592
|
|
},
|
|
{
|
|
"epoch": 2.9375,
|
|
"grad_norm": 9.208707155593626,
|
|
"learning_rate": 1.3442539196472647e-08,
|
|
"loss": 0.590284526348114,
|
|
"step": 5593
|
|
},
|
|
{
|
|
"epoch": 2.9380252100840334,
|
|
"grad_norm": 10.131472094571073,
|
|
"learning_rate": 1.3219528751773348e-08,
|
|
"loss": 0.5730470418930054,
|
|
"step": 5594
|
|
},
|
|
{
|
|
"epoch": 2.9385504201680672,
|
|
"grad_norm": 10.558149205353557,
|
|
"learning_rate": 1.2998381225095557e-08,
|
|
"loss": 0.658598005771637,
|
|
"step": 5595
|
|
},
|
|
{
|
|
"epoch": 2.939075630252101,
|
|
"grad_norm": 9.65076945448155,
|
|
"learning_rate": 1.2779096699053195e-08,
|
|
"loss": 0.531460702419281,
|
|
"step": 5596
|
|
},
|
|
{
|
|
"epoch": 2.9396008403361344,
|
|
"grad_norm": 11.906003911325623,
|
|
"learning_rate": 1.2561675255564621e-08,
|
|
"loss": 0.3952830135822296,
|
|
"step": 5597
|
|
},
|
|
{
|
|
"epoch": 2.940126050420168,
|
|
"grad_norm": 16.26547790120726,
|
|
"learning_rate": 1.2346116975853194e-08,
|
|
"loss": 0.5298194885253906,
|
|
"step": 5598
|
|
},
|
|
{
|
|
"epoch": 2.9406512605042017,
|
|
"grad_norm": 13.037334965116791,
|
|
"learning_rate": 1.213242194044395e-08,
|
|
"loss": 0.3888705372810364,
|
|
"step": 5599
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 8.155687647693032,
|
|
"learning_rate": 1.1920590229168028e-08,
|
|
"loss": 0.2725866734981537,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 2.941701680672269,
|
|
"grad_norm": 7.470703469400965,
|
|
"learning_rate": 1.1710621921159904e-08,
|
|
"loss": 0.4168459177017212,
|
|
"step": 5601
|
|
},
|
|
{
|
|
"epoch": 2.9422268907563023,
|
|
"grad_norm": 14.4781653785187,
|
|
"learning_rate": 1.1502517094856836e-08,
|
|
"loss": 1.1077405214309692,
|
|
"step": 5602
|
|
},
|
|
{
|
|
"epoch": 2.942752100840336,
|
|
"grad_norm": 7.398408819836944,
|
|
"learning_rate": 1.1296275828001635e-08,
|
|
"loss": 0.5583397150039673,
|
|
"step": 5603
|
|
},
|
|
{
|
|
"epoch": 2.94327731092437,
|
|
"grad_norm": 9.566556489167208,
|
|
"learning_rate": 1.1091898197639339e-08,
|
|
"loss": 1.3542029857635498,
|
|
"step": 5604
|
|
},
|
|
{
|
|
"epoch": 2.9438025210084033,
|
|
"grad_norm": 10.694305734078142,
|
|
"learning_rate": 1.0889384280119985e-08,
|
|
"loss": 0.9673854112625122,
|
|
"step": 5605
|
|
},
|
|
{
|
|
"epoch": 2.9443277310924367,
|
|
"grad_norm": 9.651329406855588,
|
|
"learning_rate": 1.0688734151096947e-08,
|
|
"loss": 0.24388881027698517,
|
|
"step": 5606
|
|
},
|
|
{
|
|
"epoch": 2.9448529411764706,
|
|
"grad_norm": 17.481026746232192,
|
|
"learning_rate": 1.048994788552804e-08,
|
|
"loss": 0.45848214626312256,
|
|
"step": 5607
|
|
},
|
|
{
|
|
"epoch": 2.9453781512605044,
|
|
"grad_norm": 14.050724281727012,
|
|
"learning_rate": 1.0293025557672753e-08,
|
|
"loss": 0.38031795620918274,
|
|
"step": 5608
|
|
},
|
|
{
|
|
"epoch": 2.945903361344538,
|
|
"grad_norm": 7.6029368596583655,
|
|
"learning_rate": 1.009796724109613e-08,
|
|
"loss": 0.6128751039505005,
|
|
"step": 5609
|
|
},
|
|
{
|
|
"epoch": 2.946428571428571,
|
|
"grad_norm": 10.19092600123891,
|
|
"learning_rate": 9.904773008667101e-09,
|
|
"loss": 0.2897670865058899,
|
|
"step": 5610
|
|
},
|
|
{
|
|
"epoch": 2.946953781512605,
|
|
"grad_norm": 12.9249108178978,
|
|
"learning_rate": 9.713442932556828e-09,
|
|
"loss": 0.38133394718170166,
|
|
"step": 5611
|
|
},
|
|
{
|
|
"epoch": 2.947478991596639,
|
|
"grad_norm": 8.0924426893087,
|
|
"learning_rate": 9.523977084240354e-09,
|
|
"loss": 0.5820958614349365,
|
|
"step": 5612
|
|
},
|
|
{
|
|
"epoch": 2.9480042016806722,
|
|
"grad_norm": 11.493191714545501,
|
|
"learning_rate": 9.336375534497732e-09,
|
|
"loss": 0.5863905549049377,
|
|
"step": 5613
|
|
},
|
|
{
|
|
"epoch": 2.9485294117647056,
|
|
"grad_norm": 14.327094366456585,
|
|
"learning_rate": 9.150638353410123e-09,
|
|
"loss": 0.7361984252929688,
|
|
"step": 5614
|
|
},
|
|
{
|
|
"epoch": 2.9490546218487395,
|
|
"grad_norm": 11.366103174396544,
|
|
"learning_rate": 8.966765610365357e-09,
|
|
"loss": 0.228278249502182,
|
|
"step": 5615
|
|
},
|
|
{
|
|
"epoch": 2.9495798319327733,
|
|
"grad_norm": 10.527788204520025,
|
|
"learning_rate": 8.784757374051267e-09,
|
|
"loss": 0.6901432871818542,
|
|
"step": 5616
|
|
},
|
|
{
|
|
"epoch": 2.9501050420168067,
|
|
"grad_norm": 11.224789400796311,
|
|
"learning_rate": 8.60461371246235e-09,
|
|
"loss": 0.6597157716751099,
|
|
"step": 5617
|
|
},
|
|
{
|
|
"epoch": 2.95063025210084,
|
|
"grad_norm": 11.013315168173806,
|
|
"learning_rate": 8.426334692893668e-09,
|
|
"loss": 0.4447717070579529,
|
|
"step": 5618
|
|
},
|
|
{
|
|
"epoch": 2.951155462184874,
|
|
"grad_norm": 13.531968632115458,
|
|
"learning_rate": 8.249920381946387e-09,
|
|
"loss": 0.6904777884483337,
|
|
"step": 5619
|
|
},
|
|
{
|
|
"epoch": 2.9516806722689077,
|
|
"grad_norm": 9.454423351371974,
|
|
"learning_rate": 8.075370845523344e-09,
|
|
"loss": 0.5729291439056396,
|
|
"step": 5620
|
|
},
|
|
{
|
|
"epoch": 2.952205882352941,
|
|
"grad_norm": 9.531897908238461,
|
|
"learning_rate": 7.902686148831273e-09,
|
|
"loss": 0.5849839448928833,
|
|
"step": 5621
|
|
},
|
|
{
|
|
"epoch": 2.9527310924369745,
|
|
"grad_norm": 10.234014033685307,
|
|
"learning_rate": 7.731866356380235e-09,
|
|
"loss": 0.4954346716403961,
|
|
"step": 5622
|
|
},
|
|
{
|
|
"epoch": 2.9532563025210083,
|
|
"grad_norm": 11.00023380637929,
|
|
"learning_rate": 7.56291153198363e-09,
|
|
"loss": 0.36600130796432495,
|
|
"step": 5623
|
|
},
|
|
{
|
|
"epoch": 2.953781512605042,
|
|
"grad_norm": 10.833577104485304,
|
|
"learning_rate": 7.395821738758191e-09,
|
|
"loss": 0.45721763372421265,
|
|
"step": 5624
|
|
},
|
|
{
|
|
"epoch": 2.9543067226890756,
|
|
"grad_norm": 11.445569870353463,
|
|
"learning_rate": 7.230597039123433e-09,
|
|
"loss": 0.48243996500968933,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 2.9548319327731094,
|
|
"grad_norm": 9.671976783819268,
|
|
"learning_rate": 7.067237494802759e-09,
|
|
"loss": 0.6383078694343567,
|
|
"step": 5626
|
|
},
|
|
{
|
|
"epoch": 2.955357142857143,
|
|
"grad_norm": 10.303071456198102,
|
|
"learning_rate": 6.905743166822909e-09,
|
|
"loss": 0.48624855279922485,
|
|
"step": 5627
|
|
},
|
|
{
|
|
"epoch": 2.9558823529411766,
|
|
"grad_norm": 9.856130065633476,
|
|
"learning_rate": 6.746114115513402e-09,
|
|
"loss": 0.5085325241088867,
|
|
"step": 5628
|
|
},
|
|
{
|
|
"epoch": 2.95640756302521,
|
|
"grad_norm": 11.854247092639392,
|
|
"learning_rate": 6.588350400507093e-09,
|
|
"loss": 0.6341557502746582,
|
|
"step": 5629
|
|
},
|
|
{
|
|
"epoch": 2.956932773109244,
|
|
"grad_norm": 16.474906065428893,
|
|
"learning_rate": 6.432452080739615e-09,
|
|
"loss": 0.5361814498901367,
|
|
"step": 5630
|
|
},
|
|
{
|
|
"epoch": 2.9574579831932772,
|
|
"grad_norm": 7.734640267131578,
|
|
"learning_rate": 6.2784192144504926e-09,
|
|
"loss": 0.17374935746192932,
|
|
"step": 5631
|
|
},
|
|
{
|
|
"epoch": 2.957983193277311,
|
|
"grad_norm": 17.135966116435647,
|
|
"learning_rate": 6.1262518591820305e-09,
|
|
"loss": 1.087705373764038,
|
|
"step": 5632
|
|
},
|
|
{
|
|
"epoch": 2.9585084033613445,
|
|
"grad_norm": 16.91123275408573,
|
|
"learning_rate": 5.975950071779313e-09,
|
|
"loss": 0.7166892290115356,
|
|
"step": 5633
|
|
},
|
|
{
|
|
"epoch": 2.9590336134453783,
|
|
"grad_norm": 10.70550524126326,
|
|
"learning_rate": 5.827513908390759e-09,
|
|
"loss": 0.3969137668609619,
|
|
"step": 5634
|
|
},
|
|
{
|
|
"epoch": 2.9595588235294117,
|
|
"grad_norm": 12.765748763205398,
|
|
"learning_rate": 5.6809434244681215e-09,
|
|
"loss": 0.27330633997917175,
|
|
"step": 5635
|
|
},
|
|
{
|
|
"epoch": 2.9600840336134455,
|
|
"grad_norm": 7.536149360956921,
|
|
"learning_rate": 5.536238674765937e-09,
|
|
"loss": 0.2314409613609314,
|
|
"step": 5636
|
|
},
|
|
{
|
|
"epoch": 2.960609243697479,
|
|
"grad_norm": 12.99447399307754,
|
|
"learning_rate": 5.393399713341518e-09,
|
|
"loss": 0.4921409785747528,
|
|
"step": 5637
|
|
},
|
|
{
|
|
"epoch": 2.9611344537815127,
|
|
"grad_norm": 13.114680784542122,
|
|
"learning_rate": 5.252426593555515e-09,
|
|
"loss": 0.32266703248023987,
|
|
"step": 5638
|
|
},
|
|
{
|
|
"epoch": 2.961659663865546,
|
|
"grad_norm": 10.048323761103624,
|
|
"learning_rate": 5.113319368070801e-09,
|
|
"loss": 0.29325148463249207,
|
|
"step": 5639
|
|
},
|
|
{
|
|
"epoch": 2.96218487394958,
|
|
"grad_norm": 15.986007663448719,
|
|
"learning_rate": 4.976078088855252e-09,
|
|
"loss": 0.45709648728370667,
|
|
"step": 5640
|
|
},
|
|
{
|
|
"epoch": 2.9627100840336134,
|
|
"grad_norm": 11.789635949689533,
|
|
"learning_rate": 4.8407028071773e-09,
|
|
"loss": 0.5876717567443848,
|
|
"step": 5641
|
|
},
|
|
{
|
|
"epoch": 2.963235294117647,
|
|
"grad_norm": 10.165642894319092,
|
|
"learning_rate": 4.70719357360927e-09,
|
|
"loss": 0.46749863028526306,
|
|
"step": 5642
|
|
},
|
|
{
|
|
"epoch": 2.9637605042016806,
|
|
"grad_norm": 8.247655211470287,
|
|
"learning_rate": 4.575550438026266e-09,
|
|
"loss": 0.6239333152770996,
|
|
"step": 5643
|
|
},
|
|
{
|
|
"epoch": 2.9642857142857144,
|
|
"grad_norm": 14.606346455869812,
|
|
"learning_rate": 4.445773449606727e-09,
|
|
"loss": 1.1496306657791138,
|
|
"step": 5644
|
|
},
|
|
{
|
|
"epoch": 2.964810924369748,
|
|
"grad_norm": 21.37063961876002,
|
|
"learning_rate": 4.317862656831873e-09,
|
|
"loss": 0.6101829409599304,
|
|
"step": 5645
|
|
},
|
|
{
|
|
"epoch": 2.9653361344537816,
|
|
"grad_norm": 14.804959858310237,
|
|
"learning_rate": 4.191818107485146e-09,
|
|
"loss": 0.6226564645767212,
|
|
"step": 5646
|
|
},
|
|
{
|
|
"epoch": 2.965861344537815,
|
|
"grad_norm": 11.998054483815745,
|
|
"learning_rate": 4.0676398486527715e-09,
|
|
"loss": 0.385552316904068,
|
|
"step": 5647
|
|
},
|
|
{
|
|
"epoch": 2.966386554621849,
|
|
"grad_norm": 12.225844594671564,
|
|
"learning_rate": 3.9453279267248625e-09,
|
|
"loss": 0.643718957901001,
|
|
"step": 5648
|
|
},
|
|
{
|
|
"epoch": 2.9669117647058822,
|
|
"grad_norm": 12.383298611969805,
|
|
"learning_rate": 3.8248823873932026e-09,
|
|
"loss": 0.4943666160106659,
|
|
"step": 5649
|
|
},
|
|
{
|
|
"epoch": 2.967436974789916,
|
|
"grad_norm": 8.856930762340792,
|
|
"learning_rate": 3.7063032756534666e-09,
|
|
"loss": 0.3606548607349396,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 2.9679621848739495,
|
|
"grad_norm": 12.398796097493587,
|
|
"learning_rate": 3.5895906358024424e-09,
|
|
"loss": 0.24608448147773743,
|
|
"step": 5651
|
|
},
|
|
{
|
|
"epoch": 2.9684873949579833,
|
|
"grad_norm": 13.768671634267214,
|
|
"learning_rate": 3.4747445114413634e-09,
|
|
"loss": 0.4157842993736267,
|
|
"step": 5652
|
|
},
|
|
{
|
|
"epoch": 2.9690126050420167,
|
|
"grad_norm": 9.603321706103921,
|
|
"learning_rate": 3.361764945473134e-09,
|
|
"loss": 0.47171053290367126,
|
|
"step": 5653
|
|
},
|
|
{
|
|
"epoch": 2.9695378151260505,
|
|
"grad_norm": 10.562426921855787,
|
|
"learning_rate": 3.2506519801034363e-09,
|
|
"loss": 0.4859582185745239,
|
|
"step": 5654
|
|
},
|
|
{
|
|
"epoch": 2.970063025210084,
|
|
"grad_norm": 10.185667359707393,
|
|
"learning_rate": 3.14140565684129e-09,
|
|
"loss": 0.7889777421951294,
|
|
"step": 5655
|
|
},
|
|
{
|
|
"epoch": 2.9705882352941178,
|
|
"grad_norm": 13.56718656916088,
|
|
"learning_rate": 3.0340260164979375e-09,
|
|
"loss": 0.34324878454208374,
|
|
"step": 5656
|
|
},
|
|
{
|
|
"epoch": 2.971113445378151,
|
|
"grad_norm": 16.696034099932895,
|
|
"learning_rate": 2.928513099187402e-09,
|
|
"loss": 0.45598477125167847,
|
|
"step": 5657
|
|
},
|
|
{
|
|
"epoch": 2.971638655462185,
|
|
"grad_norm": 6.703788131113769,
|
|
"learning_rate": 2.8248669443253775e-09,
|
|
"loss": 0.4168074131011963,
|
|
"step": 5658
|
|
},
|
|
{
|
|
"epoch": 2.9721638655462184,
|
|
"grad_norm": 9.494975141445275,
|
|
"learning_rate": 2.723087590632556e-09,
|
|
"loss": 0.8817811608314514,
|
|
"step": 5659
|
|
},
|
|
{
|
|
"epoch": 2.972689075630252,
|
|
"grad_norm": 11.273881748031549,
|
|
"learning_rate": 2.623175076130191e-09,
|
|
"loss": 0.4042503833770752,
|
|
"step": 5660
|
|
},
|
|
{
|
|
"epoch": 2.9732142857142856,
|
|
"grad_norm": 9.327964147462112,
|
|
"learning_rate": 2.52512943814176e-09,
|
|
"loss": 0.34631478786468506,
|
|
"step": 5661
|
|
},
|
|
{
|
|
"epoch": 2.9737394957983194,
|
|
"grad_norm": 11.240881291707485,
|
|
"learning_rate": 2.428950713295741e-09,
|
|
"loss": 0.8159435391426086,
|
|
"step": 5662
|
|
},
|
|
{
|
|
"epoch": 2.974264705882353,
|
|
"grad_norm": 16.717701558220515,
|
|
"learning_rate": 2.334638937521172e-09,
|
|
"loss": 0.4300820827484131,
|
|
"step": 5663
|
|
},
|
|
{
|
|
"epoch": 2.9747899159663866,
|
|
"grad_norm": 12.488644629851397,
|
|
"learning_rate": 2.242194146050425e-09,
|
|
"loss": 0.380912184715271,
|
|
"step": 5664
|
|
},
|
|
{
|
|
"epoch": 2.97531512605042,
|
|
"grad_norm": 9.16332819734836,
|
|
"learning_rate": 2.151616373417542e-09,
|
|
"loss": 0.386635422706604,
|
|
"step": 5665
|
|
},
|
|
{
|
|
"epoch": 2.975840336134454,
|
|
"grad_norm": 9.503764539594915,
|
|
"learning_rate": 2.0629056534599014e-09,
|
|
"loss": 0.2878572642803192,
|
|
"step": 5666
|
|
},
|
|
{
|
|
"epoch": 2.9763655462184873,
|
|
"grad_norm": 8.849958471707785,
|
|
"learning_rate": 1.9760620193182144e-09,
|
|
"loss": 0.2314329296350479,
|
|
"step": 5667
|
|
},
|
|
{
|
|
"epoch": 2.976890756302521,
|
|
"grad_norm": 10.523764182521704,
|
|
"learning_rate": 1.891085503433754e-09,
|
|
"loss": 0.6014482378959656,
|
|
"step": 5668
|
|
},
|
|
{
|
|
"epoch": 2.9774159663865545,
|
|
"grad_norm": 12.027695286433763,
|
|
"learning_rate": 1.8079761375522365e-09,
|
|
"loss": 0.4198509156703949,
|
|
"step": 5669
|
|
},
|
|
{
|
|
"epoch": 2.9779411764705883,
|
|
"grad_norm": 12.510137893586682,
|
|
"learning_rate": 1.726733952719939e-09,
|
|
"loss": 0.26352745294570923,
|
|
"step": 5670
|
|
},
|
|
{
|
|
"epoch": 2.9784663865546217,
|
|
"grad_norm": 16.31323441077259,
|
|
"learning_rate": 1.6473589792875832e-09,
|
|
"loss": 0.308188259601593,
|
|
"step": 5671
|
|
},
|
|
{
|
|
"epoch": 2.9789915966386555,
|
|
"grad_norm": 12.730371356484046,
|
|
"learning_rate": 1.569851246906451e-09,
|
|
"loss": 0.33039066195487976,
|
|
"step": 5672
|
|
},
|
|
{
|
|
"epoch": 2.979516806722689,
|
|
"grad_norm": 9.72219997453046,
|
|
"learning_rate": 1.4942107845317132e-09,
|
|
"loss": 0.30831047892570496,
|
|
"step": 5673
|
|
},
|
|
{
|
|
"epoch": 2.9800420168067228,
|
|
"grad_norm": 7.164965305532418,
|
|
"learning_rate": 1.420437620420212e-09,
|
|
"loss": 0.307595819234848,
|
|
"step": 5674
|
|
},
|
|
{
|
|
"epoch": 2.980567226890756,
|
|
"grad_norm": 11.265195686345777,
|
|
"learning_rate": 1.3485317821321231e-09,
|
|
"loss": 0.25833243131637573,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 2.98109243697479,
|
|
"grad_norm": 9.455864162725236,
|
|
"learning_rate": 1.2784932965287378e-09,
|
|
"loss": 0.415718138217926,
|
|
"step": 5676
|
|
},
|
|
{
|
|
"epoch": 2.9816176470588234,
|
|
"grad_norm": 16.196923012472972,
|
|
"learning_rate": 1.210322189774682e-09,
|
|
"loss": 0.7805691957473755,
|
|
"step": 5677
|
|
},
|
|
{
|
|
"epoch": 2.982142857142857,
|
|
"grad_norm": 9.290485662290374,
|
|
"learning_rate": 1.1440184873362514e-09,
|
|
"loss": 0.45192593336105347,
|
|
"step": 5678
|
|
},
|
|
{
|
|
"epoch": 2.9826680672268906,
|
|
"grad_norm": 9.314863646095231,
|
|
"learning_rate": 1.079582213983632e-09,
|
|
"loss": 0.7352830767631531,
|
|
"step": 5679
|
|
},
|
|
{
|
|
"epoch": 2.9831932773109244,
|
|
"grad_norm": 10.222129047944414,
|
|
"learning_rate": 1.0170133937875692e-09,
|
|
"loss": 0.43010446429252625,
|
|
"step": 5680
|
|
},
|
|
{
|
|
"epoch": 2.983718487394958,
|
|
"grad_norm": 14.478667435454975,
|
|
"learning_rate": 9.563120501221434e-10,
|
|
"loss": 0.5366514921188354,
|
|
"step": 5681
|
|
},
|
|
{
|
|
"epoch": 2.9842436974789917,
|
|
"grad_norm": 10.527100068397836,
|
|
"learning_rate": 8.9747820566366e-10,
|
|
"loss": 0.7565234303474426,
|
|
"step": 5682
|
|
},
|
|
{
|
|
"epoch": 2.9847689075630255,
|
|
"grad_norm": 14.890558663670724,
|
|
"learning_rate": 8.405118823906489e-10,
|
|
"loss": 0.46974968910217285,
|
|
"step": 5683
|
|
},
|
|
{
|
|
"epoch": 2.985294117647059,
|
|
"grad_norm": 9.425281217033564,
|
|
"learning_rate": 7.854131015844201e-10,
|
|
"loss": 0.3968534469604492,
|
|
"step": 5684
|
|
},
|
|
{
|
|
"epoch": 2.9858193277310923,
|
|
"grad_norm": 14.620206726148263,
|
|
"learning_rate": 7.321818838279537e-10,
|
|
"loss": 0.5368586182594299,
|
|
"step": 5685
|
|
},
|
|
{
|
|
"epoch": 2.986344537815126,
|
|
"grad_norm": 6.6917640984500775,
|
|
"learning_rate": 6.808182490070092e-10,
|
|
"loss": 0.5833422541618347,
|
|
"step": 5686
|
|
},
|
|
{
|
|
"epoch": 2.98686974789916,
|
|
"grad_norm": 13.430856231064407,
|
|
"learning_rate": 6.313222163095711e-10,
|
|
"loss": 0.45203086733818054,
|
|
"step": 5687
|
|
},
|
|
{
|
|
"epoch": 2.9873949579831933,
|
|
"grad_norm": 9.388426030845077,
|
|
"learning_rate": 5.836938042258489e-10,
|
|
"loss": 0.6880434155464172,
|
|
"step": 5688
|
|
},
|
|
{
|
|
"epoch": 2.9879201680672267,
|
|
"grad_norm": 9.866153797945675,
|
|
"learning_rate": 5.379330305488317e-10,
|
|
"loss": 0.3018028736114502,
|
|
"step": 5689
|
|
},
|
|
{
|
|
"epoch": 2.9884453781512605,
|
|
"grad_norm": 10.567725023760596,
|
|
"learning_rate": 4.940399123731787e-10,
|
|
"loss": 0.20725136995315552,
|
|
"step": 5690
|
|
},
|
|
{
|
|
"epoch": 2.9889705882352944,
|
|
"grad_norm": 14.340152025454126,
|
|
"learning_rate": 4.520144660957737e-10,
|
|
"loss": 0.34831321239471436,
|
|
"step": 5691
|
|
},
|
|
{
|
|
"epoch": 2.9894957983193278,
|
|
"grad_norm": 10.140995778667797,
|
|
"learning_rate": 4.118567074168356e-10,
|
|
"loss": 0.3369051218032837,
|
|
"step": 5692
|
|
},
|
|
{
|
|
"epoch": 2.990021008403361,
|
|
"grad_norm": 11.613086347026366,
|
|
"learning_rate": 3.735666513371428e-10,
|
|
"loss": 0.4291839599609375,
|
|
"step": 5693
|
|
},
|
|
{
|
|
"epoch": 2.990546218487395,
|
|
"grad_norm": 8.262672556370198,
|
|
"learning_rate": 3.371443121619189e-10,
|
|
"loss": 0.6771973371505737,
|
|
"step": 5694
|
|
},
|
|
{
|
|
"epoch": 2.991071428571429,
|
|
"grad_norm": 18.988430129138063,
|
|
"learning_rate": 3.0258970349639204e-10,
|
|
"loss": 0.36443549394607544,
|
|
"step": 5695
|
|
},
|
|
{
|
|
"epoch": 2.991596638655462,
|
|
"grad_norm": 10.925737321961899,
|
|
"learning_rate": 2.6990283825023554e-10,
|
|
"loss": 0.9888339042663574,
|
|
"step": 5696
|
|
},
|
|
{
|
|
"epoch": 2.9921218487394956,
|
|
"grad_norm": 15.978284780391945,
|
|
"learning_rate": 2.3908372863368223e-10,
|
|
"loss": 1.6931525468826294,
|
|
"step": 5697
|
|
},
|
|
{
|
|
"epoch": 2.9926470588235294,
|
|
"grad_norm": 12.290472234322491,
|
|
"learning_rate": 2.1013238615974486e-10,
|
|
"loss": 0.5588763356208801,
|
|
"step": 5698
|
|
},
|
|
{
|
|
"epoch": 2.9931722689075633,
|
|
"grad_norm": 11.573922698374671,
|
|
"learning_rate": 1.830488216442161e-10,
|
|
"loss": 1.2311294078826904,
|
|
"step": 5699
|
|
},
|
|
{
|
|
"epoch": 2.9936974789915967,
|
|
"grad_norm": 15.287092679863187,
|
|
"learning_rate": 1.5783304520455844e-10,
|
|
"loss": 0.6111997961997986,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 2.99422268907563,
|
|
"grad_norm": 6.716267768886236,
|
|
"learning_rate": 1.344850662604591e-10,
|
|
"loss": 0.3660454750061035,
|
|
"step": 5701
|
|
},
|
|
{
|
|
"epoch": 2.994747899159664,
|
|
"grad_norm": 10.11498413982614,
|
|
"learning_rate": 1.1300489353438526e-10,
|
|
"loss": 0.3281766176223755,
|
|
"step": 5702
|
|
},
|
|
{
|
|
"epoch": 2.9952731092436977,
|
|
"grad_norm": 10.783934579798286,
|
|
"learning_rate": 9.339253505102896e-11,
|
|
"loss": 0.5336626768112183,
|
|
"step": 5703
|
|
},
|
|
{
|
|
"epoch": 2.995798319327731,
|
|
"grad_norm": 9.245609678239921,
|
|
"learning_rate": 7.564799813619683e-11,
|
|
"loss": 0.2390085905790329,
|
|
"step": 5704
|
|
},
|
|
{
|
|
"epoch": 2.9963235294117645,
|
|
"grad_norm": 8.371824258006043,
|
|
"learning_rate": 5.977128941903055e-11,
|
|
"loss": 0.6434842944145203,
|
|
"step": 5705
|
|
},
|
|
{
|
|
"epoch": 2.9968487394957983,
|
|
"grad_norm": 11.54270914337302,
|
|
"learning_rate": 4.576241483089661e-11,
|
|
"loss": 0.36174070835113525,
|
|
"step": 5706
|
|
},
|
|
{
|
|
"epoch": 2.997373949579832,
|
|
"grad_norm": 8.655617354594044,
|
|
"learning_rate": 3.362137960483125e-11,
|
|
"loss": 0.5723540186882019,
|
|
"step": 5707
|
|
},
|
|
{
|
|
"epoch": 2.9978991596638656,
|
|
"grad_norm": 7.511434574508938,
|
|
"learning_rate": 2.334818827665064e-11,
|
|
"loss": 0.41090860962867737,
|
|
"step": 5708
|
|
},
|
|
{
|
|
"epoch": 2.998424369747899,
|
|
"grad_norm": 10.231341927399738,
|
|
"learning_rate": 1.494284468384066e-11,
|
|
"loss": 0.45292237401008606,
|
|
"step": 5709
|
|
},
|
|
{
|
|
"epoch": 2.9989495798319328,
|
|
"grad_norm": 11.027730083955783,
|
|
"learning_rate": 8.40535196611203e-12,
|
|
"loss": 0.43816691637039185,
|
|
"step": 5710
|
|
},
|
|
{
|
|
"epoch": 2.9994747899159666,
|
|
"grad_norm": 12.07965501739737,
|
|
"learning_rate": 3.735712566510508e-12,
|
|
"loss": 0.38289082050323486,
|
|
"step": 5711
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 10.016519800861975,
|
|
"learning_rate": 9.339282286413565e-13,
|
|
"loss": 0.3404456675052643,
|
|
"step": 5712
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"step": 5712,
|
|
"total_flos": 15197432340480.0,
|
|
"train_loss": 1.6582482910574172,
|
|
"train_runtime": 6482.0567,
|
|
"train_samples_per_second": 3.524,
|
|
"train_steps_per_second": 0.881
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 5712,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 3,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 15197432340480.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|