{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1535, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03262642740619902, "grad_norm": 0.0, "learning_rate": 3.6000000000000003e-06, "loss": 0.7799, "step": 10 }, { "epoch": 0.06525285481239804, "grad_norm": 0.0, "learning_rate": 7.600000000000001e-06, "loss": 0.7788, "step": 20 }, { "epoch": 0.09787928221859707, "grad_norm": 0.0, "learning_rate": 1.16e-05, "loss": 0.7558, "step": 30 }, { "epoch": 0.13050570962479607, "grad_norm": 0.0, "learning_rate": 1.5600000000000003e-05, "loss": 0.7929, "step": 40 }, { "epoch": 0.1631321370309951, "grad_norm": 0.0, "learning_rate": 1.9600000000000002e-05, "loss": 0.8025, "step": 50 }, { "epoch": 0.19575856443719414, "grad_norm": 0.0, "learning_rate": 1.999818745523526e-05, "loss": 0.8121, "step": 60 }, { "epoch": 0.22838499184339314, "grad_norm": 0.0, "learning_rate": 1.9991922711960104e-05, "loss": 0.768, "step": 70 }, { "epoch": 0.26101141924959215, "grad_norm": 0.0, "learning_rate": 1.998118619612634e-05, "loss": 0.7567, "step": 80 }, { "epoch": 0.2936378466557912, "grad_norm": 0.0, "learning_rate": 1.996598271274081e-05, "loss": 0.7997, "step": 90 }, { "epoch": 0.3262642740619902, "grad_norm": 0.0, "learning_rate": 1.9946319065951382e-05, "loss": 0.7646, "step": 100 }, { "epoch": 0.35889070146818924, "grad_norm": 0.0, "learning_rate": 1.9922204056001896e-05, "loss": 0.793, "step": 110 }, { "epoch": 0.3915171288743883, "grad_norm": 0.0, "learning_rate": 1.9893648475293646e-05, "loss": 0.7884, "step": 120 }, { "epoch": 0.42414355628058725, "grad_norm": 0.0, "learning_rate": 1.9860665103555418e-05, "loss": 0.7747, "step": 130 }, { "epoch": 0.4567699836867863, "grad_norm": 0.0, "learning_rate": 1.982326870212402e-05, "loss": 0.7603, "step": 140 }, { "epoch": 0.4893964110929853, "grad_norm": 0.0, "learning_rate": 1.9781476007338058e-05, "loss": 0.7703, "step": 150 }, { "epoch": 0.5220228384991843, "grad_norm": 0.0, "learning_rate": 1.973530572304773e-05, "loss": 0.7675, "step": 160 }, { "epoch": 0.5546492659053833, "grad_norm": 0.0, "learning_rate": 1.9684778512244172e-05, "loss": 0.8043, "step": 170 }, { "epoch": 0.5872756933115824, "grad_norm": 0.0, "learning_rate": 1.9629916987811924e-05, "loss": 0.8024, "step": 180 }, { "epoch": 0.6199021207177814, "grad_norm": 0.0, "learning_rate": 1.957074570240883e-05, "loss": 0.7716, "step": 190 }, { "epoch": 0.6525285481239804, "grad_norm": 0.0, "learning_rate": 1.9507291137477744e-05, "loss": 0.7651, "step": 200 }, { "epoch": 0.6851549755301795, "grad_norm": 0.0, "learning_rate": 1.943958169139507e-05, "loss": 0.7819, "step": 210 }, { "epoch": 0.7177814029363785, "grad_norm": 0.0, "learning_rate": 1.9367647666761384e-05, "loss": 0.7792, "step": 220 }, { "epoch": 0.7504078303425775, "grad_norm": 0.0, "learning_rate": 1.929152125683986e-05, "loss": 0.7937, "step": 230 }, { "epoch": 0.7830342577487766, "grad_norm": 0.0, "learning_rate": 1.92112365311485e-05, "loss": 0.7721, "step": 240 }, { "epoch": 0.8156606851549756, "grad_norm": 0.0, "learning_rate": 1.9126829420212764e-05, "loss": 0.772, "step": 250 }, { "epoch": 0.8482871125611745, "grad_norm": 0.0, "learning_rate": 1.9038337699485207e-05, "loss": 0.7611, "step": 260 }, { "epoch": 0.8809135399673735, "grad_norm": 0.0, "learning_rate": 1.894580097243954e-05, "loss": 0.7829, "step": 270 }, { "epoch": 0.9135399673735726, "grad_norm": 0.0, "learning_rate": 1.884926065284652e-05, "loss": 0.7815, "step": 280 }, { "epoch": 0.9461663947797716, "grad_norm": 0.0, "learning_rate": 1.87487599462397e-05, "loss": 0.7742, "step": 290 }, { "epoch": 0.9787928221859706, "grad_norm": 0.0, "learning_rate": 1.864434383057927e-05, "loss": 0.7561, "step": 300 }, { "epoch": 1.0097879282218598, "grad_norm": 0.0, "learning_rate": 1.853605903612267e-05, "loss": 0.7452, "step": 310 }, { "epoch": 1.0424143556280587, "grad_norm": 0.0, "learning_rate": 1.8423954024510995e-05, "loss": 0.7773, "step": 320 }, { "epoch": 1.0750407830342577, "grad_norm": 0.0, "learning_rate": 1.8308078967080547e-05, "loss": 0.8153, "step": 330 }, { "epoch": 1.1076672104404568, "grad_norm": 0.0, "learning_rate": 1.8188485722409196e-05, "loss": 0.793, "step": 340 }, { "epoch": 1.1402936378466557, "grad_norm": 0.0, "learning_rate": 1.8065227813107667e-05, "loss": 0.7822, "step": 350 }, { "epoch": 1.1729200652528549, "grad_norm": 0.0, "learning_rate": 1.7938360401866096e-05, "loss": 0.7703, "step": 360 }, { "epoch": 1.2055464926590538, "grad_norm": 0.0, "learning_rate": 1.7807940266766595e-05, "loss": 0.78, "step": 370 }, { "epoch": 1.238172920065253, "grad_norm": 0.0, "learning_rate": 1.767402577587285e-05, "loss": 0.7718, "step": 380 }, { "epoch": 1.2707993474714518, "grad_norm": 0.0, "learning_rate": 1.7536676861108167e-05, "loss": 0.7895, "step": 390 }, { "epoch": 1.3034257748776508, "grad_norm": 0.0, "learning_rate": 1.7395954991433588e-05, "loss": 0.7638, "step": 400 }, { "epoch": 1.33605220228385, "grad_norm": 0.0, "learning_rate": 1.7251923145338175e-05, "loss": 0.7874, "step": 410 }, { "epoch": 1.368678629690049, "grad_norm": 0.0, "learning_rate": 1.710464578265369e-05, "loss": 0.7947, "step": 420 }, { "epoch": 1.401305057096248, "grad_norm": 0.0, "learning_rate": 1.6954188815706306e-05, "loss": 0.7811, "step": 430 }, { "epoch": 1.433931484502447, "grad_norm": 0.0, "learning_rate": 1.680061957981831e-05, "loss": 0.736, "step": 440 }, { "epoch": 1.466557911908646, "grad_norm": 0.0, "learning_rate": 1.6644006803172926e-05, "loss": 0.765, "step": 450 }, { "epoch": 1.499184339314845, "grad_norm": 0.0, "learning_rate": 1.6484420576055787e-05, "loss": 0.7608, "step": 460 }, { "epoch": 1.531810766721044, "grad_norm": 0.0, "learning_rate": 1.6321932319486822e-05, "loss": 0.779, "step": 470 }, { "epoch": 1.564437194127243, "grad_norm": 0.0, "learning_rate": 1.6156614753256583e-05, "loss": 0.7824, "step": 480 }, { "epoch": 1.597063621533442, "grad_norm": 0.0, "learning_rate": 1.5988541863381323e-05, "loss": 0.7859, "step": 490 }, { "epoch": 1.629690048939641, "grad_norm": 0.0, "learning_rate": 1.581778886899138e-05, "loss": 0.784, "step": 500 }, { "epoch": 1.6623164763458402, "grad_norm": 0.0, "learning_rate": 1.5644432188667695e-05, "loss": 0.7578, "step": 510 }, { "epoch": 1.6949429037520392, "grad_norm": 0.0, "learning_rate": 1.546854940624156e-05, "loss": 0.7779, "step": 520 }, { "epoch": 1.727569331158238, "grad_norm": 0.0, "learning_rate": 1.5290219236072833e-05, "loss": 0.7667, "step": 530 }, { "epoch": 1.7601957585644372, "grad_norm": 0.0, "learning_rate": 1.5109521487822208e-05, "loss": 0.7765, "step": 540 }, { "epoch": 1.7928221859706364, "grad_norm": 0.0, "learning_rate": 1.4926537030733301e-05, "loss": 0.8005, "step": 550 }, { "epoch": 1.8254486133768353, "grad_norm": 0.0, "learning_rate": 1.474134775744054e-05, "loss": 0.7501, "step": 560 }, { "epoch": 1.8580750407830342, "grad_norm": 0.0, "learning_rate": 1.4554036547319033e-05, "loss": 0.7968, "step": 570 }, { "epoch": 1.8907014681892331, "grad_norm": 0.0, "learning_rate": 1.4364687229392823e-05, "loss": 0.7676, "step": 580 }, { "epoch": 1.9233278955954323, "grad_norm": 0.0, "learning_rate": 1.417338454481818e-05, "loss": 0.8098, "step": 590 }, { "epoch": 1.9559543230016314, "grad_norm": 0.0, "learning_rate": 1.3980214108958626e-05, "loss": 0.7602, "step": 600 }, { "epoch": 1.9885807504078303, "grad_norm": 0.0, "learning_rate": 1.3785262373068742e-05, "loss": 0.78, "step": 610 }, { "epoch": 2.0195758564437196, "grad_norm": 0.0, "learning_rate": 1.3588616585603908e-05, "loss": 0.79, "step": 620 }, { "epoch": 2.0522022838499185, "grad_norm": 0.0, "learning_rate": 1.3390364753173206e-05, "loss": 0.7759, "step": 630 }, { "epoch": 2.0848287112561175, "grad_norm": 0.0, "learning_rate": 1.319059560115308e-05, "loss": 0.7811, "step": 640 }, { "epoch": 2.1174551386623164, "grad_norm": 0.0, "learning_rate": 1.2989398533979271e-05, "loss": 0.793, "step": 650 }, { "epoch": 2.1500815660685153, "grad_norm": 0.0, "learning_rate": 1.278686359513488e-05, "loss": 0.7435, "step": 660 }, { "epoch": 2.1827079934747147, "grad_norm": 0.0, "learning_rate": 1.2583081426852412e-05, "loss": 0.7775, "step": 670 }, { "epoch": 2.2153344208809136, "grad_norm": 0.0, "learning_rate": 1.237814322954788e-05, "loss": 0.7885, "step": 680 }, { "epoch": 2.2479608482871125, "grad_norm": 0.0, "learning_rate": 1.217214072100508e-05, "loss": 0.7745, "step": 690 }, { "epoch": 2.2805872756933114, "grad_norm": 0.0, "learning_rate": 1.1965166095328302e-05, "loss": 0.7463, "step": 700 }, { "epoch": 2.3132137030995104, "grad_norm": 0.0, "learning_rate": 1.1757311981681943e-05, "loss": 0.7962, "step": 710 }, { "epoch": 2.3458401305057097, "grad_norm": 0.0, "learning_rate": 1.1548671402835325e-05, "loss": 0.7699, "step": 720 }, { "epoch": 2.3784665579119086, "grad_norm": 0.0, "learning_rate": 1.1339337733531435e-05, "loss": 0.8087, "step": 730 }, { "epoch": 2.4110929853181076, "grad_norm": 0.0, "learning_rate": 1.1129404658698082e-05, "loss": 0.7399, "step": 740 }, { "epoch": 2.443719412724307, "grad_norm": 0.0, "learning_rate": 1.0918966131520276e-05, "loss": 0.7841, "step": 750 }, { "epoch": 2.476345840130506, "grad_norm": 0.0, "learning_rate": 1.0708116331392542e-05, "loss": 0.7998, "step": 760 }, { "epoch": 2.5089722675367048, "grad_norm": 0.0, "learning_rate": 1.0496949621769976e-05, "loss": 0.7869, "step": 770 }, { "epoch": 2.5415986949429037, "grad_norm": 0.0, "learning_rate": 1.0285560507936962e-05, "loss": 0.789, "step": 780 }, { "epoch": 2.5742251223491026, "grad_norm": 0.0, "learning_rate": 1.007404359471238e-05, "loss": 0.7694, "step": 790 }, { "epoch": 2.6068515497553015, "grad_norm": 0.0, "learning_rate": 9.862493544110282e-06, "loss": 0.7746, "step": 800 }, { "epoch": 2.639477977161501, "grad_norm": 0.0, "learning_rate": 9.651005032974994e-06, "loss": 0.7776, "step": 810 }, { "epoch": 2.6721044045677, "grad_norm": 0.0, "learning_rate": 9.439672710609532e-06, "loss": 0.8017, "step": 820 }, { "epoch": 2.7047308319738987, "grad_norm": 0.0, "learning_rate": 9.228591156416405e-06, "loss": 0.7494, "step": 830 }, { "epoch": 2.737357259380098, "grad_norm": 0.0, "learning_rate": 9.017854837569629e-06, "loss": 0.7635, "step": 840 }, { "epoch": 2.769983686786297, "grad_norm": 0.0, "learning_rate": 8.807558066737042e-06, "loss": 0.7947, "step": 850 }, { "epoch": 2.802610114192496, "grad_norm": 0.0, "learning_rate": 8.597794959871694e-06, "loss": 0.7897, "step": 860 }, { "epoch": 2.835236541598695, "grad_norm": 0.0, "learning_rate": 8.388659394091362e-06, "loss": 0.7715, "step": 870 }, { "epoch": 2.867862969004894, "grad_norm": 0.0, "learning_rate": 8.180244965664845e-06, "loss": 0.7685, "step": 880 }, { "epoch": 2.9004893964110927, "grad_norm": 0.0, "learning_rate": 7.97264494812405e-06, "loss": 0.7456, "step": 890 }, { "epoch": 2.933115823817292, "grad_norm": 0.0, "learning_rate": 7.765952250520459e-06, "loss": 0.8071, "step": 900 }, { "epoch": 2.965742251223491, "grad_norm": 0.0, "learning_rate": 7.560259375844719e-06, "loss": 0.7667, "step": 910 }, { "epoch": 2.99836867862969, "grad_norm": 0.0, "learning_rate": 7.355658379627981e-06, "loss": 0.764, "step": 920 }, { "epoch": 3.029363784665579, "grad_norm": 0.0, "learning_rate": 7.1522408287434774e-06, "loss": 0.8021, "step": 930 }, { "epoch": 3.061990212071778, "grad_norm": 0.0, "learning_rate": 6.950097760426814e-06, "loss": 0.7764, "step": 940 }, { "epoch": 3.094616639477977, "grad_norm": 0.0, "learning_rate": 6.74931964153325e-06, "loss": 0.8317, "step": 950 }, { "epoch": 3.1272430668841764, "grad_norm": 0.0, "learning_rate": 6.549996328050296e-06, "loss": 0.789, "step": 960 }, { "epoch": 3.1598694942903753, "grad_norm": 0.0, "learning_rate": 6.352217024883678e-06, "loss": 0.7928, "step": 970 }, { "epoch": 3.1924959216965743, "grad_norm": 0.0, "learning_rate": 6.1560702459346845e-06, "loss": 0.7768, "step": 980 }, { "epoch": 3.225122349102773, "grad_norm": 0.0, "learning_rate": 5.961643774486754e-06, "loss": 0.7542, "step": 990 }, { "epoch": 3.257748776508972, "grad_norm": 0.0, "learning_rate": 5.769024623919064e-06, "loss": 0.7807, "step": 1000 }, { "epoch": 3.2903752039151715, "grad_norm": 0.0, "learning_rate": 5.57829899876469e-06, "loss": 0.7849, "step": 1010 }, { "epoch": 3.3230016313213704, "grad_norm": 0.0, "learning_rate": 5.38955225613069e-06, "loss": 0.78, "step": 1020 }, { "epoch": 3.3556280587275693, "grad_norm": 0.0, "learning_rate": 5.202868867497542e-06, "loss": 0.777, "step": 1030 }, { "epoch": 3.3882544861337682, "grad_norm": 0.0, "learning_rate": 5.01833238091485e-06, "loss": 0.7735, "step": 1040 }, { "epoch": 3.4208809135399676, "grad_norm": 0.0, "learning_rate": 4.836025383610382e-06, "loss": 0.7732, "step": 1050 }, { "epoch": 3.4535073409461665, "grad_norm": 0.0, "learning_rate": 4.656029465029057e-06, "loss": 0.7516, "step": 1060 }, { "epoch": 3.4861337683523654, "grad_norm": 0.0, "learning_rate": 4.478425180318523e-06, "loss": 0.7534, "step": 1070 }, { "epoch": 3.5187601957585644, "grad_norm": 0.0, "learning_rate": 4.3032920142776125e-06, "loss": 0.7672, "step": 1080 }, { "epoch": 3.5513866231647633, "grad_norm": 0.0, "learning_rate": 4.1307083457838004e-06, "loss": 0.7406, "step": 1090 }, { "epoch": 3.5840130505709626, "grad_norm": 0.0, "learning_rate": 3.960751412715629e-06, "loss": 0.82, "step": 1100 }, { "epoch": 3.6166394779771616, "grad_norm": 0.0, "learning_rate": 3.7934972773857637e-06, "loss": 0.7934, "step": 1110 }, { "epoch": 3.6492659053833605, "grad_norm": 0.0, "learning_rate": 3.6290207925001585e-06, "loss": 0.7772, "step": 1120 }, { "epoch": 3.6818923327895594, "grad_norm": 0.0, "learning_rate": 3.4673955676585734e-06, "loss": 0.7678, "step": 1130 }, { "epoch": 3.7145187601957588, "grad_norm": 0.0, "learning_rate": 3.308693936411421e-06, "loss": 0.7717, "step": 1140 }, { "epoch": 3.7471451876019577, "grad_norm": 0.0, "learning_rate": 3.152986923887703e-06, "loss": 0.7977, "step": 1150 }, { "epoch": 3.7797716150081566, "grad_norm": 0.0, "learning_rate": 3.000344215008524e-06, "loss": 0.76, "step": 1160 }, { "epoch": 3.8123980424143555, "grad_norm": 0.0, "learning_rate": 2.8508341233003656e-06, "loss": 0.7893, "step": 1170 }, { "epoch": 3.8450244698205545, "grad_norm": 0.0, "learning_rate": 2.7045235603221533e-06, "loss": 0.7612, "step": 1180 }, { "epoch": 3.877650897226754, "grad_norm": 0.0, "learning_rate": 2.561478005719743e-06, "loss": 0.7541, "step": 1190 }, { "epoch": 3.9102773246329527, "grad_norm": 0.0, "learning_rate": 2.421761477921232e-06, "loss": 0.7643, "step": 1200 }, { "epoch": 3.9429037520391517, "grad_norm": 0.0, "learning_rate": 2.2854365054862383e-06, "loss": 0.7838, "step": 1210 }, { "epoch": 3.9755301794453506, "grad_norm": 0.0, "learning_rate": 2.152564099121944e-06, "loss": 0.788, "step": 1220 }, { "epoch": 4.006525285481239, "grad_norm": 0.0, "learning_rate": 2.0232037243784475e-06, "loss": 0.7716, "step": 1230 }, { "epoch": 4.039151712887439, "grad_norm": 0.0, "learning_rate": 1.8974132750356156e-06, "loss": 0.792, "step": 1240 }, { "epoch": 4.071778140293638, "grad_norm": 0.0, "learning_rate": 1.7752490471933769e-06, "loss": 0.768, "step": 1250 }, { "epoch": 4.104404567699837, "grad_norm": 0.0, "learning_rate": 1.6567657140770477e-06, "loss": 0.7654, "step": 1260 }, { "epoch": 4.137030995106036, "grad_norm": 0.0, "learning_rate": 1.542016301568926e-06, "loss": 0.7698, "step": 1270 }, { "epoch": 4.169657422512235, "grad_norm": 0.0, "learning_rate": 1.4310521644771657e-06, "loss": 0.745, "step": 1280 }, { "epoch": 4.202283849918434, "grad_norm": 0.0, "learning_rate": 1.3239229635525074e-06, "loss": 0.7774, "step": 1290 }, { "epoch": 4.234910277324633, "grad_norm": 0.0, "learning_rate": 1.2206766432631766e-06, "loss": 0.7848, "step": 1300 }, { "epoch": 4.267536704730832, "grad_norm": 0.0, "learning_rate": 1.121359410337859e-06, "loss": 0.7814, "step": 1310 }, { "epoch": 4.300163132137031, "grad_norm": 0.0, "learning_rate": 1.0260157130864178e-06, "loss": 0.809, "step": 1320 }, { "epoch": 4.33278955954323, "grad_norm": 0.0, "learning_rate": 9.346882215075348e-07, "loss": 0.7976, "step": 1330 }, { "epoch": 4.365415986949429, "grad_norm": 0.0, "learning_rate": 8.474178081922524e-07, "loss": 0.7825, "step": 1340 }, { "epoch": 4.398042414355628, "grad_norm": 0.0, "learning_rate": 7.642435300318906e-07, "loss": 0.7712, "step": 1350 }, { "epoch": 4.430668841761827, "grad_norm": 0.0, "learning_rate": 6.852026107385756e-07, "loss": 0.7711, "step": 1360 }, { "epoch": 4.463295269168026, "grad_norm": 0.0, "learning_rate": 6.103304241862006e-07, "loss": 0.7903, "step": 1370 }, { "epoch": 4.495921696574225, "grad_norm": 0.0, "learning_rate": 5.396604785792281e-07, "loss": 0.7527, "step": 1380 }, { "epoch": 4.528548123980424, "grad_norm": 0.0, "learning_rate": 4.7322440145647905e-07, "loss": 0.7781, "step": 1390 }, { "epoch": 4.561174551386623, "grad_norm": 0.0, "learning_rate": 4.110519255365852e-07, "loss": 0.8016, "step": 1400 }, { "epoch": 4.593800978792823, "grad_norm": 0.0, "learning_rate": 3.531708754114438e-07, "loss": 0.7768, "step": 1410 }, { "epoch": 4.626427406199021, "grad_norm": 0.0, "learning_rate": 2.996071550936319e-07, "loss": 0.7688, "step": 1420 }, { "epoch": 4.6590538336052205, "grad_norm": 0.0, "learning_rate": 2.503847364233614e-07, "loss": 0.8049, "step": 1430 }, { "epoch": 4.691680261011419, "grad_norm": 0.0, "learning_rate": 2.0552564834014797e-07, "loss": 0.7818, "step": 1440 }, { "epoch": 4.724306688417618, "grad_norm": 0.0, "learning_rate": 1.6504996702401243e-07, "loss": 0.7737, "step": 1450 }, { "epoch": 4.756933115823817, "grad_norm": 0.0, "learning_rate": 1.2897580691060506e-07, "loss": 0.8014, "step": 1460 }, { "epoch": 4.789559543230016, "grad_norm": 0.0, "learning_rate": 9.731931258429638e-08, "loss": 0.7563, "step": 1470 }, { "epoch": 4.822185970636215, "grad_norm": 0.0, "learning_rate": 7.009465155285777e-08, "loss": 0.7504, "step": 1480 }, { "epoch": 4.854812398042414, "grad_norm": 0.0, "learning_rate": 4.731400790693785e-08, "loss": 0.7879, "step": 1490 }, { "epoch": 4.887438825448614, "grad_norm": 0.0, "learning_rate": 2.898757686722542e-08, "loss": 0.7755, "step": 1500 }, { "epoch": 4.920065252854813, "grad_norm": 0.0, "learning_rate": 1.5123560221681488e-08, "loss": 0.7803, "step": 1510 }, { "epoch": 4.952691680261012, "grad_norm": 0.0, "learning_rate": 5.728162654927705e-09, "loss": 0.7493, "step": 1520 }, { "epoch": 4.985318107667211, "grad_norm": 0.0, "learning_rate": 8.05588971406479e-10, "loss": 0.7814, "step": 1530 } ], "logging_steps": 10, "max_steps": 1535, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.877356359182975e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }