659 lines
13 KiB
JSON
659 lines
13 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.9930151338766007,
|
|
"global_step": 214,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 2e-05,
|
|
"loss": 0.8616,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.9995608365087945e-05,
|
|
"loss": 0.7122,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 1.9982437317643218e-05,
|
|
"loss": 0.6609,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 1.996049842615217e-05,
|
|
"loss": 0.6289,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 1.992981096013517e-05,
|
|
"loss": 0.6091,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 1.9890401873221642e-05,
|
|
"loss": 0.5863,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 1.984230577947597e-05,
|
|
"loss": 0.5834,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 1.9785564922995042e-05,
|
|
"loss": 0.5628,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 1.972022914080411e-05,
|
|
"loss": 0.5446,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 1.964635581908359e-05,
|
|
"loss": 0.5434,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 1.9564009842765225e-05,
|
|
"loss": 0.5196,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 1.9473263538541916e-05,
|
|
"loss": 0.5292,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 1.9374196611341212e-05,
|
|
"loss": 0.5139,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 1.9266896074318335e-05,
|
|
"loss": 0.5088,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 1.9151456172430186e-05,
|
|
"loss": 0.5079,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 1.9027978299657436e-05,
|
|
"loss": 0.5086,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 1.8896570909947477e-05,
|
|
"loss": 0.5024,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 1.875734942195637e-05,
|
|
"loss": 0.5046,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 1.8610436117673557e-05,
|
|
"loss": 0.4777,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 1.845596003501826e-05,
|
|
"loss": 0.5006,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 1.829405685450202e-05,
|
|
"loss": 0.4796,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 1.8124868780056814e-05,
|
|
"loss": 0.4701,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 1.7948544414133534e-05,
|
|
"loss": 0.4784,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 1.7765238627180424e-05,
|
|
"loss": 0.4773,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 1.7575112421616203e-05,
|
|
"loss": 0.4783,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 1.7378332790417275e-05,
|
|
"loss": 0.4772,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 1.717507257044331e-05,
|
|
"loss": 0.4671,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 1.6965510290629973e-05,
|
|
"loss": 0.4716,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 1.6749830015182106e-05,
|
|
"loss": 0.4539,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 1.6528221181905217e-05,
|
|
"loss": 0.4613,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 1.6300878435817115e-05,
|
|
"loss": 0.4758,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 1.6068001458185934e-05,
|
|
"loss": 0.4623,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 1.5829794791144723e-05,
|
|
"loss": 0.4734,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 1.5586467658036526e-05,
|
|
"loss": 0.4512,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.533823377964791e-05,
|
|
"loss": 0.4713,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.5085311186492206e-05,
|
|
"loss": 0.4789,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.482792202730745e-05,
|
|
"loss": 0.5663,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.4566292373937133e-05,
|
|
"loss": 0.4551,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.4300652022765207e-05,
|
|
"loss": 0.461,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.4031234292879726e-05,
|
|
"loss": 0.4673,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.3758275821142382e-05,
|
|
"loss": 0.4589,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.348201635434399e-05,
|
|
"loss": 0.4495,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.3202698538628376e-05,
|
|
"loss": 0.4645,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.292056770636976e-05,
|
|
"loss": 0.4555,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 1.2635871660690677e-05,
|
|
"loss": 0.4464,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 1.234886045780984e-05,
|
|
"loss": 0.4646,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 1.2059786187410984e-05,
|
|
"loss": 0.4599,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 1.176890275122573e-05,
|
|
"loss": 0.4534,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 1.1476465640024814e-05,
|
|
"loss": 0.4744,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 1.1182731709213658e-05,
|
|
"loss": 0.4626,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 1.0887958953229349e-05,
|
|
"loss": 0.4407,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.0592406278937143e-05,
|
|
"loss": 0.452,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 1.0296333278225599e-05,
|
|
"loss": 0.4496,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 1.01,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4138,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 1.02,
|
|
"learning_rate": 9.703666721774403e-06,
|
|
"loss": 0.2924,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.04,
|
|
"learning_rate": 9.407593721062858e-06,
|
|
"loss": 0.3184,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 1.06,
|
|
"learning_rate": 9.112041046770653e-06,
|
|
"loss": 0.2885,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 1.08,
|
|
"learning_rate": 8.817268290786343e-06,
|
|
"loss": 0.2842,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 1.1,
|
|
"learning_rate": 8.52353435997519e-06,
|
|
"loss": 0.2763,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 1.12,
|
|
"learning_rate": 8.231097248774273e-06,
|
|
"loss": 0.2765,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.14,
|
|
"learning_rate": 7.940213812589018e-06,
|
|
"loss": 0.2788,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 1.15,
|
|
"learning_rate": 7.651139542190164e-06,
|
|
"loss": 0.2821,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 1.17,
|
|
"learning_rate": 7.364128339309326e-06,
|
|
"loss": 0.2834,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 1.19,
|
|
"learning_rate": 7.079432293630244e-06,
|
|
"loss": 0.2728,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 1.21,
|
|
"learning_rate": 6.797301461371626e-06,
|
|
"loss": 0.274,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 1.23,
|
|
"learning_rate": 6.517983645656014e-06,
|
|
"loss": 0.276,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"learning_rate": 6.241724178857621e-06,
|
|
"loss": 0.2723,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 1.27,
|
|
"learning_rate": 5.96876570712028e-06,
|
|
"loss": 0.2876,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 1.29,
|
|
"learning_rate": 5.699347977234799e-06,
|
|
"loss": 0.2714,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 1.3,
|
|
"learning_rate": 5.43370762606287e-06,
|
|
"loss": 0.2758,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 5.172077972692553e-06,
|
|
"loss": 0.2816,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 1.34,
|
|
"learning_rate": 4.914688813507798e-06,
|
|
"loss": 0.2787,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 1.36,
|
|
"learning_rate": 4.661766220352098e-06,
|
|
"loss": 0.2801,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 1.38,
|
|
"learning_rate": 4.413532341963477e-06,
|
|
"loss": 0.271,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 1.4,
|
|
"learning_rate": 4.170205208855281e-06,
|
|
"loss": 0.2708,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 1.42,
|
|
"learning_rate": 3.931998541814069e-06,
|
|
"loss": 0.2728,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 1.43,
|
|
"learning_rate": 3.6991215641828903e-06,
|
|
"loss": 0.2719,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 1.45,
|
|
"learning_rate": 3.4717788180947855e-06,
|
|
"loss": 0.272,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 1.47,
|
|
"learning_rate": 3.250169984817897e-06,
|
|
"loss": 0.2724,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 3.0344897093700333e-06,
|
|
"loss": 0.2744,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 1.51,
|
|
"learning_rate": 2.8249274295566863e-06,
|
|
"loss": 0.2736,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 1.53,
|
|
"learning_rate": 2.6216672095827267e-06,
|
|
"loss": 0.2681,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 1.55,
|
|
"learning_rate": 2.424887578383799e-06,
|
|
"loss": 0.2703,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 1.56,
|
|
"learning_rate": 2.234761372819577e-06,
|
|
"loss": 0.2649,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 1.58,
|
|
"learning_rate": 2.0514555858664663e-06,
|
|
"loss": 0.2781,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 1.6,
|
|
"learning_rate": 1.875131219943187e-06,
|
|
"loss": 0.2645,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 1.62,
|
|
"learning_rate": 1.7059431454979825e-06,
|
|
"loss": 0.27,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 1.64,
|
|
"learning_rate": 1.5440399649817384e-06,
|
|
"loss": 0.2683,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 1.3895638823264447e-06,
|
|
"loss": 0.2645,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 1.68,
|
|
"learning_rate": 1.2426505780436326e-06,
|
|
"loss": 0.2712,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 1.69,
|
|
"learning_rate": 1.1034290900525279e-06,
|
|
"loss": 0.2621,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 1.71,
|
|
"learning_rate": 9.720217003425648e-07,
|
|
"loss": 0.268,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 1.73,
|
|
"learning_rate": 8.485438275698154e-07,
|
|
"loss": 0.2641,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 1.75,
|
|
"learning_rate": 7.331039256816664e-07,
|
|
"loss": 0.2623,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 1.77,
|
|
"learning_rate": 6.258033886587911e-07,
|
|
"loss": 0.2714,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 1.79,
|
|
"learning_rate": 5.267364614580861e-07,
|
|
"loss": 0.2723,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 1.81,
|
|
"learning_rate": 4.359901572347758e-07,
|
|
"loss": 0.2611,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 3.5364418091641374e-07,
|
|
"loss": 0.2724,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 1.84,
|
|
"learning_rate": 2.7977085919589253e-07,
|
|
"loss": 0.2738,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 1.86,
|
|
"learning_rate": 2.1443507700495968e-07,
|
|
"loss": 0.2623,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 1.88,
|
|
"learning_rate": 1.5769422052403172e-07,
|
|
"loss": 0.2695,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 1.9,
|
|
"learning_rate": 1.0959812677835968e-07,
|
|
"loss": 0.2661,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 1.92,
|
|
"learning_rate": 7.018903986483083e-08,
|
|
"loss": 0.2712,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 1.94,
|
|
"learning_rate": 3.950157384783104e-08,
|
|
"loss": 0.2706,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 1.96,
|
|
"learning_rate": 1.7562682356786488e-08,
|
|
"loss": 0.276,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 1.97,
|
|
"learning_rate": 4.39163491205652e-09,
|
|
"loss": 0.2767,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.2624,
|
|
"step": 214
|
|
}
|
|
],
|
|
"max_steps": 214,
|
|
"num_train_epochs": 2,
|
|
"total_flos": 1536113535614976.0,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|