2431 lines
48 KiB
JSON
2431 lines
48 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.8229775327133569,
|
|
"eval_steps": 500,
|
|
"global_step": 10000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 5.0453611334320685e-06,
|
|
"loss": 0.3087,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 6.229195710491767e-06,
|
|
"loss": 0.3064,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 6.903829450223392e-06,
|
|
"loss": 0.3032,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 7.377725845391017e-06,
|
|
"loss": 0.2897,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 7.743343231239583e-06,
|
|
"loss": 0.2752,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 8.041073861170494e-06,
|
|
"loss": 0.2695,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 8.292222957399574e-06,
|
|
"loss": 0.2657,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 8.509413541357755e-06,
|
|
"loss": 0.2567,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 8.700744577655557e-06,
|
|
"loss": 0.2824,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 8.871723942761204e-06,
|
|
"loss": 0.266,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.026267958246849e-06,
|
|
"loss": 0.2634,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 9.16726106663399e-06,
|
|
"loss": 0.2691,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.296889251455016e-06,
|
|
"loss": 0.256,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.416848797368692e-06,
|
|
"loss": 0.2534,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.528482449516371e-06,
|
|
"loss": 0.2719,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.632871309784314e-06,
|
|
"loss": 0.2646,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 9.73089868785391e-06,
|
|
"loss": 0.2508,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.823295589572114e-06,
|
|
"loss": 0.2521,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.910673836465484e-06,
|
|
"loss": 0.2381,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.993550644973805e-06,
|
|
"loss": 0.2728,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 9.976842105263158e-06,
|
|
"loss": 0.2246,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.950526315789475e-06,
|
|
"loss": 0.2373,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.92421052631579e-06,
|
|
"loss": 0.2511,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.897894736842107e-06,
|
|
"loss": 0.246,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.871578947368422e-06,
|
|
"loss": 0.2273,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 9.845263157894738e-06,
|
|
"loss": 0.2509,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.818947368421053e-06,
|
|
"loss": 0.2126,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.79263157894737e-06,
|
|
"loss": 0.2007,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.766315789473685e-06,
|
|
"loss": 0.248,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.74e-06,
|
|
"loss": 0.2357,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 9.713684210526317e-06,
|
|
"loss": 0.2349,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.687368421052632e-06,
|
|
"loss": 0.2388,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.661052631578948e-06,
|
|
"loss": 0.2527,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.634736842105265e-06,
|
|
"loss": 0.2283,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.60842105263158e-06,
|
|
"loss": 0.2347,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 9.582105263157897e-06,
|
|
"loss": 0.2423,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.555789473684211e-06,
|
|
"loss": 0.2306,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.529473684210528e-06,
|
|
"loss": 0.2452,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.503157894736843e-06,
|
|
"loss": 0.2157,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.476842105263158e-06,
|
|
"loss": 0.2229,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 9.452631578947368e-06,
|
|
"loss": 0.2176,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.426315789473685e-06,
|
|
"loss": 0.2204,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.4e-06,
|
|
"loss": 0.2197,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.373684210526316e-06,
|
|
"loss": 0.2192,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.347368421052633e-06,
|
|
"loss": 0.2084,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 9.321052631578948e-06,
|
|
"loss": 0.2202,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.294736842105265e-06,
|
|
"loss": 0.2066,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.26842105263158e-06,
|
|
"loss": 0.1891,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.242105263157896e-06,
|
|
"loss": 0.2137,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.215789473684211e-06,
|
|
"loss": 0.2114,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 9.189473684210526e-06,
|
|
"loss": 0.2036,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.163157894736843e-06,
|
|
"loss": 0.2245,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.136842105263158e-06,
|
|
"loss": 0.206,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.110526315789475e-06,
|
|
"loss": 0.2267,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 9.08421052631579e-06,
|
|
"loss": 0.2308,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.057894736842106e-06,
|
|
"loss": 0.1975,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.031578947368423e-06,
|
|
"loss": 0.206,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 9.005263157894738e-06,
|
|
"loss": 0.1878,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.978947368421055e-06,
|
|
"loss": 0.2008,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 8.95263157894737e-06,
|
|
"loss": 0.2034,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.926315789473685e-06,
|
|
"loss": 0.2075,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.900000000000001e-06,
|
|
"loss": 0.2086,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.873684210526316e-06,
|
|
"loss": 0.1993,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.847368421052633e-06,
|
|
"loss": 0.204,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 8.821052631578948e-06,
|
|
"loss": 0.1998,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.794736842105264e-06,
|
|
"loss": 0.222,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.76842105263158e-06,
|
|
"loss": 0.2151,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.742105263157894e-06,
|
|
"loss": 0.1854,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.715789473684211e-06,
|
|
"loss": 0.2148,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 8.689473684210526e-06,
|
|
"loss": 0.1965,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.663157894736843e-06,
|
|
"loss": 0.216,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.63684210526316e-06,
|
|
"loss": 0.2021,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.610526315789474e-06,
|
|
"loss": 0.1922,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.584210526315791e-06,
|
|
"loss": 0.2068,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 8.557894736842106e-06,
|
|
"loss": 0.2174,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.531578947368423e-06,
|
|
"loss": 0.1834,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.505263157894738e-06,
|
|
"loss": 0.1714,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.478947368421053e-06,
|
|
"loss": 0.2106,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.45263157894737e-06,
|
|
"loss": 0.2222,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 8.426315789473684e-06,
|
|
"loss": 0.1911,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.402105263157896e-06,
|
|
"loss": 0.1918,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.375789473684211e-06,
|
|
"loss": 0.1831,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.349473684210528e-06,
|
|
"loss": 0.1808,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.323157894736843e-06,
|
|
"loss": 0.1833,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 8.29684210526316e-06,
|
|
"loss": 0.1929,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.270526315789474e-06,
|
|
"loss": 0.1846,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.244210526315791e-06,
|
|
"loss": 0.1921,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.217894736842106e-06,
|
|
"loss": 0.1928,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 8.19157894736842e-06,
|
|
"loss": 0.1808,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.165263157894737e-06,
|
|
"loss": 0.191,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.138947368421052e-06,
|
|
"loss": 0.1832,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.112631578947369e-06,
|
|
"loss": 0.2098,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.086315789473684e-06,
|
|
"loss": 0.2125,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 8.06e-06,
|
|
"loss": 0.1671,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.033684210526317e-06,
|
|
"loss": 0.185,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 8.007368421052632e-06,
|
|
"loss": 0.1963,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.981052631578949e-06,
|
|
"loss": 0.1847,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.954736842105264e-06,
|
|
"loss": 0.1862,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 7.928421052631579e-06,
|
|
"loss": 0.1555,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.902105263157896e-06,
|
|
"loss": 0.1682,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.87578947368421e-06,
|
|
"loss": 0.1705,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.849473684210527e-06,
|
|
"loss": 0.1983,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.823157894736842e-06,
|
|
"loss": 0.1762,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 7.796842105263159e-06,
|
|
"loss": 0.1835,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.770526315789474e-06,
|
|
"loss": 0.1887,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.744210526315789e-06,
|
|
"loss": 0.1724,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.717894736842107e-06,
|
|
"loss": 0.1937,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.691578947368422e-06,
|
|
"loss": 0.1886,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 7.665263157894737e-06,
|
|
"loss": 0.2126,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.638947368421054e-06,
|
|
"loss": 0.2043,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.61263157894737e-06,
|
|
"loss": 0.1705,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.586315789473685e-06,
|
|
"loss": 0.1581,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.5600000000000005e-06,
|
|
"loss": 0.1699,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 7.533684210526316e-06,
|
|
"loss": 0.1801,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.507368421052632e-06,
|
|
"loss": 0.1732,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.481052631578948e-06,
|
|
"loss": 0.1846,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.454736842105264e-06,
|
|
"loss": 0.1832,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.4284210526315796e-06,
|
|
"loss": 0.1581,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 7.4021052631578945e-06,
|
|
"loss": 0.1744,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.37578947368421e-06,
|
|
"loss": 0.1718,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.351578947368422e-06,
|
|
"loss": 0.181,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.325263157894738e-06,
|
|
"loss": 0.1563,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 7.298947368421053e-06,
|
|
"loss": 0.1821,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.272631578947369e-06,
|
|
"loss": 0.1635,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.2463157894736845e-06,
|
|
"loss": 0.1922,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.22e-06,
|
|
"loss": 0.1771,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.193684210526316e-06,
|
|
"loss": 0.1721,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 7.167368421052632e-06,
|
|
"loss": 0.1518,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.141052631578948e-06,
|
|
"loss": 0.1621,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.1147368421052645e-06,
|
|
"loss": 0.1771,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.08842105263158e-06,
|
|
"loss": 0.1703,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.062105263157896e-06,
|
|
"loss": 0.1775,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 7.035789473684211e-06,
|
|
"loss": 0.1826,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 7.009473684210527e-06,
|
|
"loss": 0.156,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 6.983157894736843e-06,
|
|
"loss": 0.1743,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 6.9568421052631585e-06,
|
|
"loss": 0.1821,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 6.930526315789474e-06,
|
|
"loss": 0.1784,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 6.90421052631579e-06,
|
|
"loss": 0.1634,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 6.877894736842106e-06,
|
|
"loss": 0.1824,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 6.851578947368421e-06,
|
|
"loss": 0.1852,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 6.825263157894737e-06,
|
|
"loss": 0.1851,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 6.798947368421053e-06,
|
|
"loss": 0.1754,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 6.772631578947368e-06,
|
|
"loss": 0.1805,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.746315789473685e-06,
|
|
"loss": 0.1773,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.720000000000001e-06,
|
|
"loss": 0.167,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.693684210526317e-06,
|
|
"loss": 0.1657,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.6673684210526325e-06,
|
|
"loss": 0.2064,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 6.641052631578948e-06,
|
|
"loss": 0.1741,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.614736842105264e-06,
|
|
"loss": 0.1899,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.588421052631579e-06,
|
|
"loss": 0.1531,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.562105263157895e-06,
|
|
"loss": 0.1637,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.535789473684211e-06,
|
|
"loss": 0.1386,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 6.509473684210527e-06,
|
|
"loss": 0.1668,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.483157894736842e-06,
|
|
"loss": 0.166,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.456842105263158e-06,
|
|
"loss": 0.1653,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.430526315789474e-06,
|
|
"loss": 0.161,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 6.404210526315791e-06,
|
|
"loss": 0.172,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.3778947368421065e-06,
|
|
"loss": 0.1625,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.351578947368422e-06,
|
|
"loss": 0.1595,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.325263157894737e-06,
|
|
"loss": 0.1513,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.301052631578947e-06,
|
|
"loss": 0.1762,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 6.274736842105263e-06,
|
|
"loss": 0.1448,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.248421052631579e-06,
|
|
"loss": 0.1773,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.222105263157895e-06,
|
|
"loss": 0.1507,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.195789473684211e-06,
|
|
"loss": 0.1508,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.1694736842105265e-06,
|
|
"loss": 0.1587,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 6.143157894736843e-06,
|
|
"loss": 0.1707,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.116842105263159e-06,
|
|
"loss": 0.1648,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.090526315789475e-06,
|
|
"loss": 0.165,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.0642105263157906e-06,
|
|
"loss": 0.1892,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.0378947368421055e-06,
|
|
"loss": 0.1698,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 6.011578947368421e-06,
|
|
"loss": 0.1692,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 5.985263157894737e-06,
|
|
"loss": 0.1504,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 5.958947368421053e-06,
|
|
"loss": 0.1435,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 5.932631578947369e-06,
|
|
"loss": 0.1532,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 5.906315789473685e-06,
|
|
"loss": 0.1575,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 5.8800000000000005e-06,
|
|
"loss": 0.163,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 5.853684210526316e-06,
|
|
"loss": 0.1524,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 5.827368421052631e-06,
|
|
"loss": 0.157,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 5.801052631578949e-06,
|
|
"loss": 0.1593,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 5.774736842105264e-06,
|
|
"loss": 0.1696,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 5.7484210526315795e-06,
|
|
"loss": 0.1472,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 5.722105263157895e-06,
|
|
"loss": 0.1534,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 5.695789473684211e-06,
|
|
"loss": 0.1552,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 5.669473684210527e-06,
|
|
"loss": 0.1661,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 5.643157894736843e-06,
|
|
"loss": 0.1526,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 5.616842105263159e-06,
|
|
"loss": 0.1454,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 5.590526315789474e-06,
|
|
"loss": 0.1505,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 5.5642105263157894e-06,
|
|
"loss": 0.1593,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 5.537894736842105e-06,
|
|
"loss": 0.1653,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 5.511578947368421e-06,
|
|
"loss": 0.1298,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.485263157894737e-06,
|
|
"loss": 0.157,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.458947368421053e-06,
|
|
"loss": 0.1549,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.432631578947369e-06,
|
|
"loss": 0.1516,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.406315789473685e-06,
|
|
"loss": 0.1511,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 5.380000000000001e-06,
|
|
"loss": 0.136,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.353684210526317e-06,
|
|
"loss": 0.1558,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.327368421052632e-06,
|
|
"loss": 0.1546,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.301052631578948e-06,
|
|
"loss": 0.1524,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.2747368421052634e-06,
|
|
"loss": 0.1531,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 5.2505263157894735e-06,
|
|
"loss": 0.1541,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.224210526315789e-06,
|
|
"loss": 0.1515,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.197894736842106e-06,
|
|
"loss": 0.1483,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.171578947368422e-06,
|
|
"loss": 0.1495,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.145263157894738e-06,
|
|
"loss": 0.1593,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 5.118947368421053e-06,
|
|
"loss": 0.1507,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.092631578947369e-06,
|
|
"loss": 0.1458,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.066315789473685e-06,
|
|
"loss": 0.1426,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.04e-06,
|
|
"loss": 0.1422,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 5.013684210526316e-06,
|
|
"loss": 0.1586,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 4.987368421052632e-06,
|
|
"loss": 0.1416,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.9610526315789475e-06,
|
|
"loss": 0.1757,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.934736842105264e-06,
|
|
"loss": 0.1338,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.908421052631579e-06,
|
|
"loss": 0.1545,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.882105263157895e-06,
|
|
"loss": 0.1494,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 4.855789473684211e-06,
|
|
"loss": 0.1627,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.8294736842105266e-06,
|
|
"loss": 0.1554,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.803157894736842e-06,
|
|
"loss": 0.1487,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.776842105263158e-06,
|
|
"loss": 0.1472,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.750526315789474e-06,
|
|
"loss": 0.1345,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 4.72421052631579e-06,
|
|
"loss": 0.1513,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.697894736842106e-06,
|
|
"loss": 0.1376,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.6715789473684215e-06,
|
|
"loss": 0.1554,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.645263157894737e-06,
|
|
"loss": 0.1475,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.618947368421053e-06,
|
|
"loss": 0.1449,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 4.592631578947369e-06,
|
|
"loss": 0.128,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.566315789473685e-06,
|
|
"loss": 0.1386,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.540000000000001e-06,
|
|
"loss": 0.1701,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.513684210526316e-06,
|
|
"loss": 0.1492,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 4.487368421052632e-06,
|
|
"loss": 0.1476,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.461052631578948e-06,
|
|
"loss": 0.146,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.434736842105263e-06,
|
|
"loss": 0.1399,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.408421052631579e-06,
|
|
"loss": 0.1462,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.3821052631578955e-06,
|
|
"loss": 0.1369,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 4.355789473684211e-06,
|
|
"loss": 0.1576,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.329473684210527e-06,
|
|
"loss": 0.1639,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.303157894736842e-06,
|
|
"loss": 0.1439,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.276842105263158e-06,
|
|
"loss": 0.1548,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.250526315789474e-06,
|
|
"loss": 0.1377,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 4.22421052631579e-06,
|
|
"loss": 0.1455,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.2000000000000004e-06,
|
|
"loss": 0.1581,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.173684210526316e-06,
|
|
"loss": 0.1463,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.147368421052632e-06,
|
|
"loss": 0.1524,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.121052631578948e-06,
|
|
"loss": 0.1262,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 4.094736842105264e-06,
|
|
"loss": 0.1492,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.0684210526315795e-06,
|
|
"loss": 0.1378,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.042105263157895e-06,
|
|
"loss": 0.1508,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 4.01578947368421e-06,
|
|
"loss": 0.1329,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 3.989473684210526e-06,
|
|
"loss": 0.1342,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 3.963157894736843e-06,
|
|
"loss": 0.1336,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 3.936842105263159e-06,
|
|
"loss": 0.1259,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 3.9105263157894744e-06,
|
|
"loss": 0.1402,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 3.884210526315789e-06,
|
|
"loss": 0.1361,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 3.857894736842105e-06,
|
|
"loss": 0.149,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 3.831578947368421e-06,
|
|
"loss": 0.1471,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 3.805263157894737e-06,
|
|
"loss": 0.1264,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 3.778947368421053e-06,
|
|
"loss": 0.1329,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 3.752631578947369e-06,
|
|
"loss": 0.1643,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 3.7263157894736848e-06,
|
|
"loss": 0.1489,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 3.7e-06,
|
|
"loss": 0.1234,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 3.673684210526316e-06,
|
|
"loss": 0.1318,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 3.6473684210526318e-06,
|
|
"loss": 0.1481,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 3.621052631578948e-06,
|
|
"loss": 0.1454,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 3.5947368421052634e-06,
|
|
"loss": 0.1493,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 3.5684210526315792e-06,
|
|
"loss": 0.1371,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 3.542105263157895e-06,
|
|
"loss": 0.1435,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 3.515789473684211e-06,
|
|
"loss": 0.1475,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 3.4894736842105263e-06,
|
|
"loss": 0.1509,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 3.463157894736842e-06,
|
|
"loss": 0.1512,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 3.4368421052631583e-06,
|
|
"loss": 0.1376,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 3.410526315789474e-06,
|
|
"loss": 0.1333,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 3.38421052631579e-06,
|
|
"loss": 0.1355,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 3.3578947368421054e-06,
|
|
"loss": 0.1577,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 3.331578947368421e-06,
|
|
"loss": 0.1315,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 3.305263157894737e-06,
|
|
"loss": 0.1328,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 3.278947368421053e-06,
|
|
"loss": 0.1445,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 3.252631578947369e-06,
|
|
"loss": 0.1373,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 3.2263157894736845e-06,
|
|
"loss": 0.1347,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 3.2000000000000003e-06,
|
|
"loss": 0.1323,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 3.173684210526316e-06,
|
|
"loss": 0.1406,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 3.1494736842105266e-06,
|
|
"loss": 0.1225,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 3.1231578947368424e-06,
|
|
"loss": 0.1459,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 3.096842105263158e-06,
|
|
"loss": 0.1315,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 3.0705263157894736e-06,
|
|
"loss": 0.1233,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 3.0442105263157894e-06,
|
|
"loss": 0.1328,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 3.0178947368421057e-06,
|
|
"loss": 0.1374,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 2.9915789473684215e-06,
|
|
"loss": 0.1323,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 2.9652631578947373e-06,
|
|
"loss": 0.1576,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 2.9389473684210527e-06,
|
|
"loss": 0.1365,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.9126315789473685e-06,
|
|
"loss": 0.1519,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.8863157894736843e-06,
|
|
"loss": 0.1446,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.86e-06,
|
|
"loss": 0.1529,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.8336842105263164e-06,
|
|
"loss": 0.1426,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.8073684210526318e-06,
|
|
"loss": 0.1352,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.7810526315789476e-06,
|
|
"loss": 0.1391,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.7547368421052634e-06,
|
|
"loss": 0.1369,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.7284210526315792e-06,
|
|
"loss": 0.1361,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.7021052631578946e-06,
|
|
"loss": 0.1319,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.6757894736842105e-06,
|
|
"loss": 0.1236,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.6494736842105267e-06,
|
|
"loss": 0.1349,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.6231578947368425e-06,
|
|
"loss": 0.1438,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.5968421052631583e-06,
|
|
"loss": 0.1203,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.5705263157894737e-06,
|
|
"loss": 0.1335,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.5442105263157895e-06,
|
|
"loss": 0.1609,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.5178947368421054e-06,
|
|
"loss": 0.1199,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.491578947368421e-06,
|
|
"loss": 0.135,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.465263157894737e-06,
|
|
"loss": 0.1355,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.438947368421053e-06,
|
|
"loss": 0.1429,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.4126315789473686e-06,
|
|
"loss": 0.1199,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.3863157894736845e-06,
|
|
"loss": 0.1185,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.3600000000000003e-06,
|
|
"loss": 0.158,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.333684210526316e-06,
|
|
"loss": 0.1367,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.307368421052632e-06,
|
|
"loss": 0.1454,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 2.2810526315789473e-06,
|
|
"loss": 0.1239,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 2.2547368421052635e-06,
|
|
"loss": 0.116,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 2.228421052631579e-06,
|
|
"loss": 0.1256,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 2.2021052631578948e-06,
|
|
"loss": 0.1233,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 2.175789473684211e-06,
|
|
"loss": 0.1435,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.1494736842105264e-06,
|
|
"loss": 0.126,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.1231578947368422e-06,
|
|
"loss": 0.1343,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.098947368421053e-06,
|
|
"loss": 0.1192,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.0726315789473685e-06,
|
|
"loss": 0.1582,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.0463157894736843e-06,
|
|
"loss": 0.1336,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 2.02e-06,
|
|
"loss": 0.133,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.993684210526316e-06,
|
|
"loss": 0.1092,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.9673684210526318e-06,
|
|
"loss": 0.1518,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.9410526315789476e-06,
|
|
"loss": 0.1223,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.9147368421052634e-06,
|
|
"loss": 0.138,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.888421052631579e-06,
|
|
"loss": 0.1348,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8621052631578948e-06,
|
|
"loss": 0.1182,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8357894736842109e-06,
|
|
"loss": 0.1305,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.8094736842105265e-06,
|
|
"loss": 0.1389,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.7831578947368423e-06,
|
|
"loss": 0.1275,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.756842105263158e-06,
|
|
"loss": 0.1443,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.730526315789474e-06,
|
|
"loss": 0.1134,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.7042105263157895e-06,
|
|
"loss": 0.1122,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.6778947368421054e-06,
|
|
"loss": 0.1451,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.6515789473684212e-06,
|
|
"loss": 0.1204,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.625263157894737e-06,
|
|
"loss": 0.121,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.5989473684210526e-06,
|
|
"loss": 0.1289,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.5726315789473686e-06,
|
|
"loss": 0.1388,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.5463157894736845e-06,
|
|
"loss": 0.1316,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.52e-06,
|
|
"loss": 0.1396,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.4936842105263159e-06,
|
|
"loss": 0.1206,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.4673684210526317e-06,
|
|
"loss": 0.1362,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.4410526315789475e-06,
|
|
"loss": 0.1213,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.4147368421052631e-06,
|
|
"loss": 0.1398,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.3884210526315792e-06,
|
|
"loss": 0.1446,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.362105263157895e-06,
|
|
"loss": 0.1098,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.3357894736842106e-06,
|
|
"loss": 0.1352,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.3094736842105262e-06,
|
|
"loss": 0.1295,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.2831578947368422e-06,
|
|
"loss": 0.1309,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.256842105263158e-06,
|
|
"loss": 0.1492,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.2305263157894739e-06,
|
|
"loss": 0.1489,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.2042105263157895e-06,
|
|
"loss": 0.1187,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.1778947368421053e-06,
|
|
"loss": 0.144,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.1515789473684213e-06,
|
|
"loss": 0.1201,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.125263157894737e-06,
|
|
"loss": 0.11,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.0989473684210527e-06,
|
|
"loss": 0.1476,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.0726315789473685e-06,
|
|
"loss": 0.1462,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.048421052631579e-06,
|
|
"loss": 0.1503,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.0221052631578948e-06,
|
|
"loss": 0.1338,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 9.957894736842107e-07,
|
|
"loss": 0.1339,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 9.694736842105265e-07,
|
|
"loss": 0.1322,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 9.431578947368422e-07,
|
|
"loss": 0.1316,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 9.168421052631579e-07,
|
|
"loss": 0.1234,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.905263157894737e-07,
|
|
"loss": 0.1297,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.642105263157896e-07,
|
|
"loss": 0.1218,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.378947368421054e-07,
|
|
"loss": 0.124,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 8.115789473684212e-07,
|
|
"loss": 0.1271,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 7.852631578947369e-07,
|
|
"loss": 0.1196,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 7.589473684210527e-07,
|
|
"loss": 0.1296,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 7.326315789473684e-07,
|
|
"loss": 0.1182,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 7.063157894736842e-07,
|
|
"loss": 0.1213,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 6.800000000000001e-07,
|
|
"loss": 0.1302,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 6.536842105263158e-07,
|
|
"loss": 0.133,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 6.273684210526317e-07,
|
|
"loss": 0.1202,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 6.010526315789474e-07,
|
|
"loss": 0.1293,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 5.747368421052632e-07,
|
|
"loss": 0.1416,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 5.484210526315789e-07,
|
|
"loss": 0.1398,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 5.221052631578948e-07,
|
|
"loss": 0.1314,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.957894736842106e-07,
|
|
"loss": 0.1301,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.694736842105264e-07,
|
|
"loss": 0.1242,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.4315789473684216e-07,
|
|
"loss": 0.1194,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 4.168421052631579e-07,
|
|
"loss": 0.1313,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 3.905263157894737e-07,
|
|
"loss": 0.1379,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 3.6421052631578945e-07,
|
|
"loss": 0.1469,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 3.378947368421053e-07,
|
|
"loss": 0.1472,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 3.115789473684211e-07,
|
|
"loss": 0.1312,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.8526315789473686e-07,
|
|
"loss": 0.1152,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 2.589473684210526e-07,
|
|
"loss": 0.1024,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 2.3263157894736844e-07,
|
|
"loss": 0.1194,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 2.0631578947368423e-07,
|
|
"loss": 0.1304,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.8e-07,
|
|
"loss": 0.1289,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.536842105263158e-07,
|
|
"loss": 0.1305,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.2736842105263158e-07,
|
|
"loss": 0.1182,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.0105263157894737e-07,
|
|
"loss": 0.11,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 7.473684210526317e-08,
|
|
"loss": 0.1196,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 4.842105263157895e-08,
|
|
"loss": 0.1286,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 2.2105263157894736e-08,
|
|
"loss": 0.1367,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"step": 10000,
|
|
"total_flos": 5.4359970325966356e+20,
|
|
"train_loss": 0.16558935852050782,
|
|
"train_runtime": 67809.1634,
|
|
"train_samples_per_second": 2.36,
|
|
"train_steps_per_second": 0.147
|
|
}
|
|
],
|
|
"logging_steps": 25,
|
|
"max_steps": 10000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 2000,
|
|
"total_flos": 5.4359970325966356e+20,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|