{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7502206531332745, "eval_steps": 100, "global_step": 3400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011032656663724624, "grad_norm": 5.878592491149902, "learning_rate": 8.810572687224672e-08, "loss": 0.4689, "step": 5 }, { "epoch": 0.002206531332744925, "grad_norm": 5.2574687004089355, "learning_rate": 1.982378854625551e-07, "loss": 0.4759, "step": 10 }, { "epoch": 0.0033097969991173876, "grad_norm": 5.635329723358154, "learning_rate": 3.083700440528635e-07, "loss": 0.466, "step": 15 }, { "epoch": 0.00441306266548985, "grad_norm": 5.272955417633057, "learning_rate": 4.1850220264317185e-07, "loss": 0.4468, "step": 20 }, { "epoch": 0.005516328331862313, "grad_norm": 5.219903945922852, "learning_rate": 5.286343612334802e-07, "loss": 0.4513, "step": 25 }, { "epoch": 0.006619593998234775, "grad_norm": 4.160714626312256, "learning_rate": 6.387665198237886e-07, "loss": 0.4372, "step": 30 }, { "epoch": 0.007722859664607238, "grad_norm": 3.6638221740722656, "learning_rate": 7.48898678414097e-07, "loss": 0.4449, "step": 35 }, { "epoch": 0.0088261253309797, "grad_norm": 3.3206021785736084, "learning_rate": 8.590308370044054e-07, "loss": 0.4125, "step": 40 }, { "epoch": 0.009929390997352162, "grad_norm": 2.711574077606201, "learning_rate": 9.691629955947138e-07, "loss": 0.4156, "step": 45 }, { "epoch": 0.011032656663724626, "grad_norm": 1.6887717247009277, "learning_rate": 1.0792951541850223e-06, "loss": 0.4075, "step": 50 }, { "epoch": 0.012135922330097087, "grad_norm": 1.177046775817871, "learning_rate": 1.1894273127753305e-06, "loss": 0.3647, "step": 55 }, { "epoch": 0.01323918799646955, "grad_norm": 0.8924531936645508, "learning_rate": 1.299559471365639e-06, "loss": 0.3854, "step": 60 }, { "epoch": 0.014342453662842012, "grad_norm": 1.1198736429214478, "learning_rate": 1.4096916299559475e-06, "loss": 0.3644, "step": 65 }, { "epoch": 0.015445719329214475, "grad_norm": 0.6698480248451233, "learning_rate": 1.5198237885462555e-06, "loss": 0.344, "step": 70 }, { "epoch": 0.01654898499558694, "grad_norm": 0.6314343214035034, "learning_rate": 1.629955947136564e-06, "loss": 0.3258, "step": 75 }, { "epoch": 0.0176522506619594, "grad_norm": 0.5537658929824829, "learning_rate": 1.7400881057268722e-06, "loss": 0.3236, "step": 80 }, { "epoch": 0.01875551632833186, "grad_norm": 0.6194472312927246, "learning_rate": 1.8502202643171807e-06, "loss": 0.3219, "step": 85 }, { "epoch": 0.019858781994704325, "grad_norm": 0.4850139915943146, "learning_rate": 1.960352422907489e-06, "loss": 0.3041, "step": 90 }, { "epoch": 0.020962047661076788, "grad_norm": 0.5193836092948914, "learning_rate": 2.0704845814977977e-06, "loss": 0.3198, "step": 95 }, { "epoch": 0.02206531332744925, "grad_norm": 0.49118679761886597, "learning_rate": 2.180616740088106e-06, "loss": 0.3266, "step": 100 }, { "epoch": 0.02206531332744925, "eval_loss": 0.3100859820842743, "eval_runtime": 269.4841, "eval_samples_per_second": 56.638, "eval_steps_per_second": 7.08, "step": 100 }, { "epoch": 0.02316857899382171, "grad_norm": 0.47427237033843994, "learning_rate": 2.290748898678414e-06, "loss": 0.2955, "step": 105 }, { "epoch": 0.024271844660194174, "grad_norm": 0.49798524379730225, "learning_rate": 2.400881057268723e-06, "loss": 0.302, "step": 110 }, { "epoch": 0.025375110326566638, "grad_norm": 0.46623724699020386, "learning_rate": 2.511013215859031e-06, "loss": 0.2971, "step": 115 }, { "epoch": 0.0264783759929391, "grad_norm": 0.4659285247325897, "learning_rate": 2.6211453744493394e-06, "loss": 0.2918, "step": 120 }, { "epoch": 0.02758164165931156, "grad_norm": 0.46458956599235535, "learning_rate": 2.731277533039648e-06, "loss": 0.2819, "step": 125 }, { "epoch": 0.028684907325684024, "grad_norm": 0.5174154043197632, "learning_rate": 2.841409691629956e-06, "loss": 0.2953, "step": 130 }, { "epoch": 0.029788172992056487, "grad_norm": 0.49397629499435425, "learning_rate": 2.9515418502202646e-06, "loss": 0.2932, "step": 135 }, { "epoch": 0.03089143865842895, "grad_norm": 0.4400649964809418, "learning_rate": 3.061674008810573e-06, "loss": 0.2829, "step": 140 }, { "epoch": 0.031994704324801414, "grad_norm": 0.4720049798488617, "learning_rate": 3.1718061674008815e-06, "loss": 0.2905, "step": 145 }, { "epoch": 0.03309796999117388, "grad_norm": 0.46713733673095703, "learning_rate": 3.2819383259911898e-06, "loss": 0.2949, "step": 150 }, { "epoch": 0.03420123565754634, "grad_norm": 0.4691792130470276, "learning_rate": 3.3920704845814985e-06, "loss": 0.2845, "step": 155 }, { "epoch": 0.0353045013239188, "grad_norm": 0.507400393486023, "learning_rate": 3.5022026431718063e-06, "loss": 0.2929, "step": 160 }, { "epoch": 0.03640776699029126, "grad_norm": 0.5042280554771423, "learning_rate": 3.6123348017621146e-06, "loss": 0.2903, "step": 165 }, { "epoch": 0.03751103265666372, "grad_norm": 0.4863748252391815, "learning_rate": 3.7224669603524232e-06, "loss": 0.2871, "step": 170 }, { "epoch": 0.038614298323036186, "grad_norm": 0.510486364364624, "learning_rate": 3.8325991189427315e-06, "loss": 0.2995, "step": 175 }, { "epoch": 0.03971756398940865, "grad_norm": 0.5123398900032043, "learning_rate": 3.94273127753304e-06, "loss": 0.2794, "step": 180 }, { "epoch": 0.04082082965578111, "grad_norm": 0.45075932145118713, "learning_rate": 4.052863436123348e-06, "loss": 0.2827, "step": 185 }, { "epoch": 0.041924095322153576, "grad_norm": 0.4237598180770874, "learning_rate": 4.162995594713657e-06, "loss": 0.2806, "step": 190 }, { "epoch": 0.04302736098852604, "grad_norm": 0.43099313974380493, "learning_rate": 4.273127753303965e-06, "loss": 0.2667, "step": 195 }, { "epoch": 0.0441306266548985, "grad_norm": 0.5144179463386536, "learning_rate": 4.383259911894274e-06, "loss": 0.2748, "step": 200 }, { "epoch": 0.0441306266548985, "eval_loss": 0.28673937916755676, "eval_runtime": 271.0828, "eval_samples_per_second": 56.304, "eval_steps_per_second": 7.038, "step": 200 }, { "epoch": 0.04523389232127096, "grad_norm": 0.4969826340675354, "learning_rate": 4.493392070484582e-06, "loss": 0.2763, "step": 205 }, { "epoch": 0.04633715798764342, "grad_norm": 0.471080482006073, "learning_rate": 4.60352422907489e-06, "loss": 0.2775, "step": 210 }, { "epoch": 0.047440423654015886, "grad_norm": 0.4287184476852417, "learning_rate": 4.7136563876651984e-06, "loss": 0.2694, "step": 215 }, { "epoch": 0.04854368932038835, "grad_norm": 0.5114341974258423, "learning_rate": 4.823788546255507e-06, "loss": 0.2803, "step": 220 }, { "epoch": 0.04964695498676081, "grad_norm": 0.5093077421188354, "learning_rate": 4.933920704845816e-06, "loss": 0.2802, "step": 225 }, { "epoch": 0.050750220653133275, "grad_norm": 0.5257371664047241, "learning_rate": 5.044052863436124e-06, "loss": 0.292, "step": 230 }, { "epoch": 0.05185348631950574, "grad_norm": 0.3984781503677368, "learning_rate": 5.154185022026432e-06, "loss": 0.2659, "step": 235 }, { "epoch": 0.0529567519858782, "grad_norm": 0.4864000082015991, "learning_rate": 5.2643171806167406e-06, "loss": 0.2722, "step": 240 }, { "epoch": 0.054060017652250665, "grad_norm": 0.592187762260437, "learning_rate": 5.374449339207049e-06, "loss": 0.2793, "step": 245 }, { "epoch": 0.05516328331862312, "grad_norm": 0.6081680655479431, "learning_rate": 5.484581497797358e-06, "loss": 0.2723, "step": 250 }, { "epoch": 0.056266548984995585, "grad_norm": 0.4736359715461731, "learning_rate": 5.594713656387666e-06, "loss": 0.2539, "step": 255 }, { "epoch": 0.05736981465136805, "grad_norm": 0.46948790550231934, "learning_rate": 5.704845814977974e-06, "loss": 0.2748, "step": 260 }, { "epoch": 0.05847308031774051, "grad_norm": 0.5093392729759216, "learning_rate": 5.814977973568282e-06, "loss": 0.2614, "step": 265 }, { "epoch": 0.059576345984112974, "grad_norm": 0.5078116655349731, "learning_rate": 5.925110132158591e-06, "loss": 0.2706, "step": 270 }, { "epoch": 0.06067961165048544, "grad_norm": 0.5074042677879333, "learning_rate": 6.035242290748899e-06, "loss": 0.2644, "step": 275 }, { "epoch": 0.0617828773168579, "grad_norm": 0.45398184657096863, "learning_rate": 6.1453744493392075e-06, "loss": 0.2612, "step": 280 }, { "epoch": 0.06288614298323036, "grad_norm": 0.47826600074768066, "learning_rate": 6.255506607929516e-06, "loss": 0.275, "step": 285 }, { "epoch": 0.06398940864960283, "grad_norm": 0.49216607213020325, "learning_rate": 6.365638766519824e-06, "loss": 0.2639, "step": 290 }, { "epoch": 0.06509267431597529, "grad_norm": 0.5131933093070984, "learning_rate": 6.475770925110133e-06, "loss": 0.286, "step": 295 }, { "epoch": 0.06619593998234775, "grad_norm": 0.4883180856704712, "learning_rate": 6.585903083700441e-06, "loss": 0.2813, "step": 300 }, { "epoch": 0.06619593998234775, "eval_loss": 0.2764524519443512, "eval_runtime": 269.5554, "eval_samples_per_second": 56.623, "eval_steps_per_second": 7.078, "step": 300 }, { "epoch": 0.06729920564872022, "grad_norm": 0.47087526321411133, "learning_rate": 6.69603524229075e-06, "loss": 0.2724, "step": 305 }, { "epoch": 0.06840247131509268, "grad_norm": 0.4638593792915344, "learning_rate": 6.806167400881057e-06, "loss": 0.2572, "step": 310 }, { "epoch": 0.06950573698146513, "grad_norm": 0.5100425481796265, "learning_rate": 6.916299559471367e-06, "loss": 0.2814, "step": 315 }, { "epoch": 0.0706090026478376, "grad_norm": 0.505667507648468, "learning_rate": 7.026431718061674e-06, "loss": 0.2672, "step": 320 }, { "epoch": 0.07171226831421006, "grad_norm": 0.4753463864326477, "learning_rate": 7.136563876651983e-06, "loss": 0.2672, "step": 325 }, { "epoch": 0.07281553398058252, "grad_norm": 0.5588636994361877, "learning_rate": 7.246696035242291e-06, "loss": 0.2732, "step": 330 }, { "epoch": 0.07391879964695498, "grad_norm": 0.5111093521118164, "learning_rate": 7.3568281938326e-06, "loss": 0.2643, "step": 335 }, { "epoch": 0.07502206531332745, "grad_norm": 0.49439841508865356, "learning_rate": 7.466960352422908e-06, "loss": 0.2677, "step": 340 }, { "epoch": 0.07612533097969991, "grad_norm": 0.4980764389038086, "learning_rate": 7.5770925110132166e-06, "loss": 0.2577, "step": 345 }, { "epoch": 0.07722859664607237, "grad_norm": 0.5545366406440735, "learning_rate": 7.687224669603525e-06, "loss": 0.2735, "step": 350 }, { "epoch": 0.07833186231244484, "grad_norm": 0.45210951566696167, "learning_rate": 7.797356828193832e-06, "loss": 0.2544, "step": 355 }, { "epoch": 0.0794351279788173, "grad_norm": 0.4653448462486267, "learning_rate": 7.907488986784141e-06, "loss": 0.2815, "step": 360 }, { "epoch": 0.08053839364518976, "grad_norm": 0.4663446247577667, "learning_rate": 8.01762114537445e-06, "loss": 0.2647, "step": 365 }, { "epoch": 0.08164165931156223, "grad_norm": 0.5129761695861816, "learning_rate": 8.127753303964758e-06, "loss": 0.2561, "step": 370 }, { "epoch": 0.08274492497793469, "grad_norm": 0.5074746012687683, "learning_rate": 8.237885462555067e-06, "loss": 0.2642, "step": 375 }, { "epoch": 0.08384819064430715, "grad_norm": 0.48852622509002686, "learning_rate": 8.348017621145376e-06, "loss": 0.2484, "step": 380 }, { "epoch": 0.08495145631067962, "grad_norm": 0.46195775270462036, "learning_rate": 8.458149779735683e-06, "loss": 0.2432, "step": 385 }, { "epoch": 0.08605472197705208, "grad_norm": 0.5792168974876404, "learning_rate": 8.568281938325993e-06, "loss": 0.2711, "step": 390 }, { "epoch": 0.08715798764342454, "grad_norm": 0.57877516746521, "learning_rate": 8.6784140969163e-06, "loss": 0.2758, "step": 395 }, { "epoch": 0.088261253309797, "grad_norm": 0.4454537332057953, "learning_rate": 8.788546255506607e-06, "loss": 0.269, "step": 400 }, { "epoch": 0.088261253309797, "eval_loss": 0.27229130268096924, "eval_runtime": 269.3494, "eval_samples_per_second": 56.666, "eval_steps_per_second": 7.084, "step": 400 }, { "epoch": 0.08936451897616945, "grad_norm": 0.5199728608131409, "learning_rate": 8.898678414096917e-06, "loss": 0.2594, "step": 405 }, { "epoch": 0.09046778464254192, "grad_norm": 0.4717728793621063, "learning_rate": 9.008810572687226e-06, "loss": 0.267, "step": 410 }, { "epoch": 0.09157105030891438, "grad_norm": 0.4550599157810211, "learning_rate": 9.118942731277533e-06, "loss": 0.254, "step": 415 }, { "epoch": 0.09267431597528684, "grad_norm": 0.5019270777702332, "learning_rate": 9.229074889867842e-06, "loss": 0.263, "step": 420 }, { "epoch": 0.09377758164165931, "grad_norm": 0.5012001991271973, "learning_rate": 9.339207048458151e-06, "loss": 0.2588, "step": 425 }, { "epoch": 0.09488084730803177, "grad_norm": 0.5489994883537292, "learning_rate": 9.449339207048459e-06, "loss": 0.2684, "step": 430 }, { "epoch": 0.09598411297440423, "grad_norm": 0.5029094219207764, "learning_rate": 9.559471365638768e-06, "loss": 0.2635, "step": 435 }, { "epoch": 0.0970873786407767, "grad_norm": 0.5363909006118774, "learning_rate": 9.669603524229075e-06, "loss": 0.2572, "step": 440 }, { "epoch": 0.09819064430714916, "grad_norm": 0.6020154356956482, "learning_rate": 9.779735682819384e-06, "loss": 0.2554, "step": 445 }, { "epoch": 0.09929390997352162, "grad_norm": 0.5789384841918945, "learning_rate": 9.889867841409693e-06, "loss": 0.2814, "step": 450 }, { "epoch": 0.10039717563989409, "grad_norm": 0.5141489505767822, "learning_rate": 1e-05, "loss": 0.2498, "step": 455 }, { "epoch": 0.10150044130626655, "grad_norm": 0.5430559515953064, "learning_rate": 9.99999167904182e-06, "loss": 0.276, "step": 460 }, { "epoch": 0.10260370697263901, "grad_norm": 0.5350551009178162, "learning_rate": 9.999966716194973e-06, "loss": 0.2364, "step": 465 }, { "epoch": 0.10370697263901148, "grad_norm": 0.5607656836509705, "learning_rate": 9.999925111542544e-06, "loss": 0.2599, "step": 470 }, { "epoch": 0.10481023830538394, "grad_norm": 0.4968941807746887, "learning_rate": 9.99986686522301e-06, "loss": 0.262, "step": 475 }, { "epoch": 0.1059135039717564, "grad_norm": 0.49971750378608704, "learning_rate": 9.999791977430238e-06, "loss": 0.2642, "step": 480 }, { "epoch": 0.10701676963812887, "grad_norm": 0.48582854866981506, "learning_rate": 9.999700448413483e-06, "loss": 0.252, "step": 485 }, { "epoch": 0.10812003530450133, "grad_norm": 0.5446631908416748, "learning_rate": 9.999592278477389e-06, "loss": 0.2652, "step": 490 }, { "epoch": 0.10922330097087378, "grad_norm": 0.4594772160053253, "learning_rate": 9.999467467981984e-06, "loss": 0.253, "step": 495 }, { "epoch": 0.11032656663724624, "grad_norm": 0.5003575682640076, "learning_rate": 9.999326017342688e-06, "loss": 0.2629, "step": 500 }, { "epoch": 0.11032656663724624, "eval_loss": 0.26893937587738037, "eval_runtime": 268.2711, "eval_samples_per_second": 56.894, "eval_steps_per_second": 7.112, "step": 500 }, { "epoch": 0.1114298323036187, "grad_norm": 0.4908248484134674, "learning_rate": 9.999167927030304e-06, "loss": 0.2577, "step": 505 }, { "epoch": 0.11253309796999117, "grad_norm": 0.6282269358634949, "learning_rate": 9.998993197571014e-06, "loss": 0.2714, "step": 510 }, { "epoch": 0.11363636363636363, "grad_norm": 0.4572107195854187, "learning_rate": 9.998801829546387e-06, "loss": 0.2469, "step": 515 }, { "epoch": 0.1147396293027361, "grad_norm": 0.5121334195137024, "learning_rate": 9.99859382359337e-06, "loss": 0.2657, "step": 520 }, { "epoch": 0.11584289496910856, "grad_norm": 0.4956417679786682, "learning_rate": 9.998369180404283e-06, "loss": 0.2435, "step": 525 }, { "epoch": 0.11694616063548102, "grad_norm": 0.6356124877929688, "learning_rate": 9.998127900726825e-06, "loss": 0.2694, "step": 530 }, { "epoch": 0.11804942630185349, "grad_norm": 0.5022862553596497, "learning_rate": 9.997869985364073e-06, "loss": 0.2655, "step": 535 }, { "epoch": 0.11915269196822595, "grad_norm": 0.47873038053512573, "learning_rate": 9.997595435174461e-06, "loss": 0.2704, "step": 540 }, { "epoch": 0.12025595763459841, "grad_norm": 0.46941813826560974, "learning_rate": 9.997304251071802e-06, "loss": 0.2594, "step": 545 }, { "epoch": 0.12135922330097088, "grad_norm": 0.47806182503700256, "learning_rate": 9.996996434025264e-06, "loss": 0.2597, "step": 550 }, { "epoch": 0.12246248896734334, "grad_norm": 0.666239857673645, "learning_rate": 9.996671985059384e-06, "loss": 0.2722, "step": 555 }, { "epoch": 0.1235657546337158, "grad_norm": 0.40519818663597107, "learning_rate": 9.99633090525405e-06, "loss": 0.2483, "step": 560 }, { "epoch": 0.12466902030008827, "grad_norm": 0.5072190165519714, "learning_rate": 9.99597319574451e-06, "loss": 0.2528, "step": 565 }, { "epoch": 0.12577228596646073, "grad_norm": 0.4706517159938812, "learning_rate": 9.995598857721354e-06, "loss": 0.2628, "step": 570 }, { "epoch": 0.12687555163283318, "grad_norm": 0.46087875962257385, "learning_rate": 9.995207892430525e-06, "loss": 0.2537, "step": 575 }, { "epoch": 0.12797881729920565, "grad_norm": 0.5278568863868713, "learning_rate": 9.994800301173303e-06, "loss": 0.2687, "step": 580 }, { "epoch": 0.1290820829655781, "grad_norm": 0.5285748839378357, "learning_rate": 9.994376085306309e-06, "loss": 0.2647, "step": 585 }, { "epoch": 0.13018534863195058, "grad_norm": 0.5125618577003479, "learning_rate": 9.9939352462415e-06, "loss": 0.251, "step": 590 }, { "epoch": 0.13128861429832303, "grad_norm": 0.64532870054245, "learning_rate": 9.993477785446151e-06, "loss": 0.2574, "step": 595 }, { "epoch": 0.1323918799646955, "grad_norm": 0.49944519996643066, "learning_rate": 9.99300370444287e-06, "loss": 0.2495, "step": 600 }, { "epoch": 0.1323918799646955, "eval_loss": 0.2665667235851288, "eval_runtime": 268.5635, "eval_samples_per_second": 56.832, "eval_steps_per_second": 7.104, "step": 600 }, { "epoch": 0.13349514563106796, "grad_norm": 0.4549316465854645, "learning_rate": 9.99251300480958e-06, "loss": 0.2765, "step": 605 }, { "epoch": 0.13459841129744043, "grad_norm": 0.5687413215637207, "learning_rate": 9.992005688179518e-06, "loss": 0.2729, "step": 610 }, { "epoch": 0.13570167696381288, "grad_norm": 0.5213468074798584, "learning_rate": 9.991481756241228e-06, "loss": 0.2536, "step": 615 }, { "epoch": 0.13680494263018536, "grad_norm": 0.5374130606651306, "learning_rate": 9.990941210738553e-06, "loss": 0.2629, "step": 620 }, { "epoch": 0.1379082082965578, "grad_norm": 0.4314231276512146, "learning_rate": 9.99038405347064e-06, "loss": 0.2538, "step": 625 }, { "epoch": 0.13901147396293026, "grad_norm": 0.4665580093860626, "learning_rate": 9.989810286291923e-06, "loss": 0.2538, "step": 630 }, { "epoch": 0.14011473962930274, "grad_norm": 0.5242295861244202, "learning_rate": 9.989219911112114e-06, "loss": 0.2633, "step": 635 }, { "epoch": 0.1412180052956752, "grad_norm": 0.49017295241355896, "learning_rate": 9.988612929896211e-06, "loss": 0.2678, "step": 640 }, { "epoch": 0.14232127096204766, "grad_norm": 0.5350978970527649, "learning_rate": 9.987989344664479e-06, "loss": 0.2686, "step": 645 }, { "epoch": 0.1434245366284201, "grad_norm": 0.5256524085998535, "learning_rate": 9.98734915749245e-06, "loss": 0.2623, "step": 650 }, { "epoch": 0.1445278022947926, "grad_norm": 0.5268611907958984, "learning_rate": 9.98669237051091e-06, "loss": 0.2545, "step": 655 }, { "epoch": 0.14563106796116504, "grad_norm": 0.4854044020175934, "learning_rate": 9.986018985905901e-06, "loss": 0.2624, "step": 660 }, { "epoch": 0.14673433362753752, "grad_norm": 0.6195608973503113, "learning_rate": 9.985329005918702e-06, "loss": 0.26, "step": 665 }, { "epoch": 0.14783759929390997, "grad_norm": 0.5011169910430908, "learning_rate": 9.984622432845835e-06, "loss": 0.2468, "step": 670 }, { "epoch": 0.14894086496028244, "grad_norm": 0.5306973457336426, "learning_rate": 9.98389926903904e-06, "loss": 0.2478, "step": 675 }, { "epoch": 0.1500441306266549, "grad_norm": 0.5306282043457031, "learning_rate": 9.983159516905287e-06, "loss": 0.2589, "step": 680 }, { "epoch": 0.15114739629302737, "grad_norm": 0.47503378987312317, "learning_rate": 9.982403178906755e-06, "loss": 0.2467, "step": 685 }, { "epoch": 0.15225066195939982, "grad_norm": 0.548812985420227, "learning_rate": 9.981630257560825e-06, "loss": 0.2649, "step": 690 }, { "epoch": 0.1533539276257723, "grad_norm": 0.5385993719100952, "learning_rate": 9.980840755440075e-06, "loss": 0.2507, "step": 695 }, { "epoch": 0.15445719329214475, "grad_norm": 0.5004397034645081, "learning_rate": 9.980034675172274e-06, "loss": 0.2648, "step": 700 }, { "epoch": 0.15445719329214475, "eval_loss": 0.2640049159526825, "eval_runtime": 273.2064, "eval_samples_per_second": 55.866, "eval_steps_per_second": 6.984, "step": 700 }, { "epoch": 0.15556045895851722, "grad_norm": 0.5281751155853271, "learning_rate": 9.979212019440364e-06, "loss": 0.2598, "step": 705 }, { "epoch": 0.15666372462488967, "grad_norm": 0.41343384981155396, "learning_rate": 9.978372790982457e-06, "loss": 0.2502, "step": 710 }, { "epoch": 0.15776699029126215, "grad_norm": 0.4603078365325928, "learning_rate": 9.977516992591832e-06, "loss": 0.2716, "step": 715 }, { "epoch": 0.1588702559576346, "grad_norm": 0.47830113768577576, "learning_rate": 9.976644627116906e-06, "loss": 0.2532, "step": 720 }, { "epoch": 0.15997352162400705, "grad_norm": 0.4745546281337738, "learning_rate": 9.975755697461254e-06, "loss": 0.2602, "step": 725 }, { "epoch": 0.16107678729037953, "grad_norm": 0.47886019945144653, "learning_rate": 9.97485020658357e-06, "loss": 0.273, "step": 730 }, { "epoch": 0.16218005295675197, "grad_norm": 0.7469276189804077, "learning_rate": 9.973928157497675e-06, "loss": 0.2631, "step": 735 }, { "epoch": 0.16328331862312445, "grad_norm": 0.520545482635498, "learning_rate": 9.972989553272501e-06, "loss": 0.2506, "step": 740 }, { "epoch": 0.1643865842894969, "grad_norm": 0.428124338388443, "learning_rate": 9.972034397032086e-06, "loss": 0.2482, "step": 745 }, { "epoch": 0.16548984995586938, "grad_norm": 0.5379740595817566, "learning_rate": 9.971062691955553e-06, "loss": 0.2557, "step": 750 }, { "epoch": 0.16659311562224183, "grad_norm": 0.5184879899024963, "learning_rate": 9.970074441277111e-06, "loss": 0.2587, "step": 755 }, { "epoch": 0.1676963812886143, "grad_norm": 0.4643745422363281, "learning_rate": 9.969069648286034e-06, "loss": 0.2538, "step": 760 }, { "epoch": 0.16879964695498675, "grad_norm": 0.48147132992744446, "learning_rate": 9.968048316326661e-06, "loss": 0.2534, "step": 765 }, { "epoch": 0.16990291262135923, "grad_norm": 0.5775375366210938, "learning_rate": 9.967010448798376e-06, "loss": 0.2659, "step": 770 }, { "epoch": 0.17100617828773168, "grad_norm": 0.5497225522994995, "learning_rate": 9.9659560491556e-06, "loss": 0.2464, "step": 775 }, { "epoch": 0.17210944395410416, "grad_norm": 0.4842096269130707, "learning_rate": 9.964885120907777e-06, "loss": 0.2341, "step": 780 }, { "epoch": 0.1732127096204766, "grad_norm": 0.5398717522621155, "learning_rate": 9.963797667619368e-06, "loss": 0.2585, "step": 785 }, { "epoch": 0.17431597528684908, "grad_norm": 0.5121772289276123, "learning_rate": 9.962693692909834e-06, "loss": 0.2677, "step": 790 }, { "epoch": 0.17541924095322153, "grad_norm": 0.5982369184494019, "learning_rate": 9.961573200453627e-06, "loss": 0.2572, "step": 795 }, { "epoch": 0.176522506619594, "grad_norm": 0.527195394039154, "learning_rate": 9.960436193980175e-06, "loss": 0.2503, "step": 800 }, { "epoch": 0.176522506619594, "eval_loss": 0.2625011205673218, "eval_runtime": 269.8781, "eval_samples_per_second": 56.555, "eval_steps_per_second": 7.07, "step": 800 }, { "epoch": 0.17762577228596646, "grad_norm": 0.5573858022689819, "learning_rate": 9.959282677273869e-06, "loss": 0.266, "step": 805 }, { "epoch": 0.1787290379523389, "grad_norm": 0.4883844554424286, "learning_rate": 9.958112654174058e-06, "loss": 0.2572, "step": 810 }, { "epoch": 0.1798323036187114, "grad_norm": 0.46676844358444214, "learning_rate": 9.956926128575026e-06, "loss": 0.2459, "step": 815 }, { "epoch": 0.18093556928508384, "grad_norm": 0.4161823093891144, "learning_rate": 9.955723104425986e-06, "loss": 0.2411, "step": 820 }, { "epoch": 0.1820388349514563, "grad_norm": 0.45280569791793823, "learning_rate": 9.954503585731061e-06, "loss": 0.2586, "step": 825 }, { "epoch": 0.18314210061782876, "grad_norm": 0.43848279118537903, "learning_rate": 9.953267576549279e-06, "loss": 0.2464, "step": 830 }, { "epoch": 0.18424536628420124, "grad_norm": 0.48168620467185974, "learning_rate": 9.95201508099455e-06, "loss": 0.2512, "step": 835 }, { "epoch": 0.1853486319505737, "grad_norm": 0.5931830406188965, "learning_rate": 9.950746103235663e-06, "loss": 0.2526, "step": 840 }, { "epoch": 0.18645189761694617, "grad_norm": 0.4570105969905853, "learning_rate": 9.949460647496258e-06, "loss": 0.2457, "step": 845 }, { "epoch": 0.18755516328331862, "grad_norm": 0.4142732322216034, "learning_rate": 9.948158718054828e-06, "loss": 0.2441, "step": 850 }, { "epoch": 0.1886584289496911, "grad_norm": 0.48708921670913696, "learning_rate": 9.94684031924469e-06, "loss": 0.2577, "step": 855 }, { "epoch": 0.18976169461606354, "grad_norm": 0.5016698241233826, "learning_rate": 9.945505455453983e-06, "loss": 0.2562, "step": 860 }, { "epoch": 0.19086496028243602, "grad_norm": 0.5526902675628662, "learning_rate": 9.944154131125643e-06, "loss": 0.255, "step": 865 }, { "epoch": 0.19196822594880847, "grad_norm": 0.526472806930542, "learning_rate": 9.942786350757398e-06, "loss": 0.2659, "step": 870 }, { "epoch": 0.19307149161518095, "grad_norm": 0.5003820061683655, "learning_rate": 9.941402118901743e-06, "loss": 0.2565, "step": 875 }, { "epoch": 0.1941747572815534, "grad_norm": 0.5030418038368225, "learning_rate": 9.940001440165934e-06, "loss": 0.2628, "step": 880 }, { "epoch": 0.19527802294792587, "grad_norm": 0.47498998045921326, "learning_rate": 9.938584319211965e-06, "loss": 0.2744, "step": 885 }, { "epoch": 0.19638128861429832, "grad_norm": 0.5270429253578186, "learning_rate": 9.93715076075656e-06, "loss": 0.2561, "step": 890 }, { "epoch": 0.1974845542806708, "grad_norm": 0.5205044150352478, "learning_rate": 9.935700769571148e-06, "loss": 0.2449, "step": 895 }, { "epoch": 0.19858781994704325, "grad_norm": 0.41483354568481445, "learning_rate": 9.934234350481856e-06, "loss": 0.2595, "step": 900 }, { "epoch": 0.19858781994704325, "eval_loss": 0.26145488023757935, "eval_runtime": 273.4791, "eval_samples_per_second": 55.81, "eval_steps_per_second": 6.977, "step": 900 }, { "epoch": 0.1996910856134157, "grad_norm": 0.5100669860839844, "learning_rate": 9.932751508369492e-06, "loss": 0.2485, "step": 905 }, { "epoch": 0.20079435127978817, "grad_norm": 0.4988202154636383, "learning_rate": 9.931252248169518e-06, "loss": 0.2555, "step": 910 }, { "epoch": 0.20189761694616062, "grad_norm": 0.49361705780029297, "learning_rate": 9.929736574872052e-06, "loss": 0.2579, "step": 915 }, { "epoch": 0.2030008826125331, "grad_norm": 0.4196428060531616, "learning_rate": 9.92820449352183e-06, "loss": 0.2456, "step": 920 }, { "epoch": 0.20410414827890555, "grad_norm": 0.425731897354126, "learning_rate": 9.926656009218208e-06, "loss": 0.2457, "step": 925 }, { "epoch": 0.20520741394527803, "grad_norm": 0.4996449649333954, "learning_rate": 9.925091127115139e-06, "loss": 0.2689, "step": 930 }, { "epoch": 0.20631067961165048, "grad_norm": 0.4646408259868622, "learning_rate": 9.923509852421144e-06, "loss": 0.2429, "step": 935 }, { "epoch": 0.20741394527802295, "grad_norm": 0.5681747794151306, "learning_rate": 9.921912190399317e-06, "loss": 0.2581, "step": 940 }, { "epoch": 0.2085172109443954, "grad_norm": 0.45096200704574585, "learning_rate": 9.920298146367287e-06, "loss": 0.2465, "step": 945 }, { "epoch": 0.20962047661076788, "grad_norm": 0.4459875226020813, "learning_rate": 9.91866772569721e-06, "loss": 0.2593, "step": 950 }, { "epoch": 0.21072374227714033, "grad_norm": 0.4605613946914673, "learning_rate": 9.917020933815753e-06, "loss": 0.2646, "step": 955 }, { "epoch": 0.2118270079435128, "grad_norm": 0.5199949741363525, "learning_rate": 9.91535777620407e-06, "loss": 0.2572, "step": 960 }, { "epoch": 0.21293027360988526, "grad_norm": 0.42653989791870117, "learning_rate": 9.913678258397785e-06, "loss": 0.2547, "step": 965 }, { "epoch": 0.21403353927625773, "grad_norm": 0.5204625725746155, "learning_rate": 9.91198238598698e-06, "loss": 0.2407, "step": 970 }, { "epoch": 0.21513680494263018, "grad_norm": 0.4516238272190094, "learning_rate": 9.910270164616168e-06, "loss": 0.2531, "step": 975 }, { "epoch": 0.21624007060900266, "grad_norm": 0.4373854696750641, "learning_rate": 9.908541599984276e-06, "loss": 0.2495, "step": 980 }, { "epoch": 0.2173433362753751, "grad_norm": 0.5143552422523499, "learning_rate": 9.90679669784463e-06, "loss": 0.2496, "step": 985 }, { "epoch": 0.21844660194174756, "grad_norm": 0.41742590069770813, "learning_rate": 9.905035464004935e-06, "loss": 0.2481, "step": 990 }, { "epoch": 0.21954986760812004, "grad_norm": 0.46620362997055054, "learning_rate": 9.90325790432725e-06, "loss": 0.2625, "step": 995 }, { "epoch": 0.22065313327449249, "grad_norm": 0.4866413176059723, "learning_rate": 9.901464024727976e-06, "loss": 0.247, "step": 1000 }, { "epoch": 0.22065313327449249, "eval_loss": 0.25996777415275574, "eval_runtime": 273.2634, "eval_samples_per_second": 55.855, "eval_steps_per_second": 6.982, "step": 1000 }, { "epoch": 0.22175639894086496, "grad_norm": 0.4647798240184784, "learning_rate": 9.899653831177831e-06, "loss": 0.2528, "step": 1005 }, { "epoch": 0.2228596646072374, "grad_norm": 0.4932115972042084, "learning_rate": 9.897827329701834e-06, "loss": 0.2544, "step": 1010 }, { "epoch": 0.2239629302736099, "grad_norm": 0.4925852417945862, "learning_rate": 9.895984526379282e-06, "loss": 0.2621, "step": 1015 }, { "epoch": 0.22506619593998234, "grad_norm": 0.5298591256141663, "learning_rate": 9.89412542734373e-06, "loss": 0.2542, "step": 1020 }, { "epoch": 0.22616946160635482, "grad_norm": 0.5149207711219788, "learning_rate": 9.892250038782972e-06, "loss": 0.2579, "step": 1025 }, { "epoch": 0.22727272727272727, "grad_norm": 0.45972946286201477, "learning_rate": 9.890358366939021e-06, "loss": 0.2534, "step": 1030 }, { "epoch": 0.22837599293909974, "grad_norm": 0.4157005846500397, "learning_rate": 9.888450418108085e-06, "loss": 0.243, "step": 1035 }, { "epoch": 0.2294792586054722, "grad_norm": 0.39558079838752747, "learning_rate": 9.88652619864055e-06, "loss": 0.2461, "step": 1040 }, { "epoch": 0.23058252427184467, "grad_norm": 0.47637176513671875, "learning_rate": 9.884585714940953e-06, "loss": 0.2353, "step": 1045 }, { "epoch": 0.23168578993821712, "grad_norm": 0.5233368277549744, "learning_rate": 9.882628973467972e-06, "loss": 0.2536, "step": 1050 }, { "epoch": 0.2327890556045896, "grad_norm": 0.4879682660102844, "learning_rate": 9.880655980734391e-06, "loss": 0.2611, "step": 1055 }, { "epoch": 0.23389232127096204, "grad_norm": 0.4481244385242462, "learning_rate": 9.878666743307083e-06, "loss": 0.2549, "step": 1060 }, { "epoch": 0.23499558693733452, "grad_norm": 0.43410834670066833, "learning_rate": 9.876661267806995e-06, "loss": 0.2589, "step": 1065 }, { "epoch": 0.23609885260370697, "grad_norm": 0.6263585686683655, "learning_rate": 9.874639560909118e-06, "loss": 0.2471, "step": 1070 }, { "epoch": 0.23720211827007945, "grad_norm": 0.47275134921073914, "learning_rate": 9.872601629342468e-06, "loss": 0.2575, "step": 1075 }, { "epoch": 0.2383053839364519, "grad_norm": 0.5037277340888977, "learning_rate": 9.870547479890062e-06, "loss": 0.2549, "step": 1080 }, { "epoch": 0.23940864960282435, "grad_norm": 0.5256139039993286, "learning_rate": 9.868477119388897e-06, "loss": 0.2574, "step": 1085 }, { "epoch": 0.24051191526919682, "grad_norm": 0.46220555901527405, "learning_rate": 9.866390554729923e-06, "loss": 0.257, "step": 1090 }, { "epoch": 0.24161518093556927, "grad_norm": 0.48123809695243835, "learning_rate": 9.864287792858032e-06, "loss": 0.2437, "step": 1095 }, { "epoch": 0.24271844660194175, "grad_norm": 0.5462665557861328, "learning_rate": 9.862168840772018e-06, "loss": 0.2454, "step": 1100 }, { "epoch": 0.24271844660194175, "eval_loss": 0.25873637199401855, "eval_runtime": 271.9106, "eval_samples_per_second": 56.132, "eval_steps_per_second": 7.017, "step": 1100 }, { "epoch": 0.2438217122683142, "grad_norm": 0.5108821392059326, "learning_rate": 9.860033705524566e-06, "loss": 0.247, "step": 1105 }, { "epoch": 0.24492497793468668, "grad_norm": 0.47107893228530884, "learning_rate": 9.857882394222225e-06, "loss": 0.2546, "step": 1110 }, { "epoch": 0.24602824360105913, "grad_norm": 0.4935952425003052, "learning_rate": 9.855714914025386e-06, "loss": 0.247, "step": 1115 }, { "epoch": 0.2471315092674316, "grad_norm": 0.5136795043945312, "learning_rate": 9.853531272148248e-06, "loss": 0.2615, "step": 1120 }, { "epoch": 0.24823477493380405, "grad_norm": 0.5249958634376526, "learning_rate": 9.851331475858813e-06, "loss": 0.2619, "step": 1125 }, { "epoch": 0.24933804060017653, "grad_norm": 0.4954059422016144, "learning_rate": 9.849115532478848e-06, "loss": 0.2473, "step": 1130 }, { "epoch": 0.250441306266549, "grad_norm": 0.47944945096969604, "learning_rate": 9.846883449383854e-06, "loss": 0.2566, "step": 1135 }, { "epoch": 0.25154457193292146, "grad_norm": 0.5183018445968628, "learning_rate": 9.844635234003067e-06, "loss": 0.2629, "step": 1140 }, { "epoch": 0.25264783759929393, "grad_norm": 0.4572855830192566, "learning_rate": 9.842370893819404e-06, "loss": 0.2593, "step": 1145 }, { "epoch": 0.25375110326566636, "grad_norm": 0.4775985777378082, "learning_rate": 9.840090436369458e-06, "loss": 0.2354, "step": 1150 }, { "epoch": 0.25485436893203883, "grad_norm": 0.48503291606903076, "learning_rate": 9.837793869243468e-06, "loss": 0.2483, "step": 1155 }, { "epoch": 0.2559576345984113, "grad_norm": 0.46030426025390625, "learning_rate": 9.83548120008529e-06, "loss": 0.2616, "step": 1160 }, { "epoch": 0.2570609002647838, "grad_norm": 0.5037588477134705, "learning_rate": 9.83315243659237e-06, "loss": 0.2488, "step": 1165 }, { "epoch": 0.2581641659311562, "grad_norm": 0.508270263671875, "learning_rate": 9.830807586515726e-06, "loss": 0.2579, "step": 1170 }, { "epoch": 0.2592674315975287, "grad_norm": 0.4799206554889679, "learning_rate": 9.828446657659919e-06, "loss": 0.25, "step": 1175 }, { "epoch": 0.26037069726390116, "grad_norm": 0.531873881816864, "learning_rate": 9.826069657883027e-06, "loss": 0.2467, "step": 1180 }, { "epoch": 0.2614739629302736, "grad_norm": 0.5633664727210999, "learning_rate": 9.823676595096612e-06, "loss": 0.2595, "step": 1185 }, { "epoch": 0.26257722859664606, "grad_norm": 0.5257665514945984, "learning_rate": 9.821267477265705e-06, "loss": 0.2662, "step": 1190 }, { "epoch": 0.26368049426301854, "grad_norm": 0.5463647246360779, "learning_rate": 9.818842312408776e-06, "loss": 0.2478, "step": 1195 }, { "epoch": 0.264783759929391, "grad_norm": 0.4790140986442566, "learning_rate": 9.816401108597704e-06, "loss": 0.2516, "step": 1200 }, { "epoch": 0.264783759929391, "eval_loss": 0.25740158557891846, "eval_runtime": 274.3768, "eval_samples_per_second": 55.628, "eval_steps_per_second": 6.954, "step": 1200 }, { "epoch": 0.26588702559576344, "grad_norm": 0.44939637184143066, "learning_rate": 9.813943873957748e-06, "loss": 0.2568, "step": 1205 }, { "epoch": 0.2669902912621359, "grad_norm": 0.44032007455825806, "learning_rate": 9.811470616667525e-06, "loss": 0.2598, "step": 1210 }, { "epoch": 0.2680935569285084, "grad_norm": 0.4683074951171875, "learning_rate": 9.808981344958988e-06, "loss": 0.2468, "step": 1215 }, { "epoch": 0.26919682259488087, "grad_norm": 0.46099165081977844, "learning_rate": 9.806476067117384e-06, "loss": 0.2597, "step": 1220 }, { "epoch": 0.2703000882612533, "grad_norm": 0.47137463092803955, "learning_rate": 9.803954791481239e-06, "loss": 0.2564, "step": 1225 }, { "epoch": 0.27140335392762577, "grad_norm": 0.41110455989837646, "learning_rate": 9.801417526442326e-06, "loss": 0.256, "step": 1230 }, { "epoch": 0.27250661959399824, "grad_norm": 0.4750699996948242, "learning_rate": 9.798864280445633e-06, "loss": 0.2461, "step": 1235 }, { "epoch": 0.2736098852603707, "grad_norm": 0.4262714684009552, "learning_rate": 9.79629506198934e-06, "loss": 0.2611, "step": 1240 }, { "epoch": 0.27471315092674314, "grad_norm": 0.4699675738811493, "learning_rate": 9.793709879624797e-06, "loss": 0.2454, "step": 1245 }, { "epoch": 0.2758164165931156, "grad_norm": 0.4742600619792938, "learning_rate": 9.791108741956476e-06, "loss": 0.2583, "step": 1250 }, { "epoch": 0.2769196822594881, "grad_norm": 0.4389561414718628, "learning_rate": 9.78849165764196e-06, "loss": 0.24, "step": 1255 }, { "epoch": 0.2780229479258605, "grad_norm": 0.4927821457386017, "learning_rate": 9.785858635391913e-06, "loss": 0.2527, "step": 1260 }, { "epoch": 0.279126213592233, "grad_norm": 0.38495415449142456, "learning_rate": 9.78320968397004e-06, "loss": 0.2411, "step": 1265 }, { "epoch": 0.2802294792586055, "grad_norm": 0.4532706141471863, "learning_rate": 9.780544812193065e-06, "loss": 0.234, "step": 1270 }, { "epoch": 0.28133274492497795, "grad_norm": 0.48407411575317383, "learning_rate": 9.777864028930705e-06, "loss": 0.2599, "step": 1275 }, { "epoch": 0.2824360105913504, "grad_norm": 0.47091105580329895, "learning_rate": 9.77516734310563e-06, "loss": 0.2445, "step": 1280 }, { "epoch": 0.28353927625772285, "grad_norm": 0.5425460934638977, "learning_rate": 9.772454763693453e-06, "loss": 0.2499, "step": 1285 }, { "epoch": 0.2846425419240953, "grad_norm": 0.43206480145454407, "learning_rate": 9.769726299722668e-06, "loss": 0.2539, "step": 1290 }, { "epoch": 0.2857458075904678, "grad_norm": 0.49715983867645264, "learning_rate": 9.766981960274653e-06, "loss": 0.2526, "step": 1295 }, { "epoch": 0.2868490732568402, "grad_norm": 0.4886232018470764, "learning_rate": 9.764221754483623e-06, "loss": 0.2496, "step": 1300 }, { "epoch": 0.2868490732568402, "eval_loss": 0.2564772367477417, "eval_runtime": 269.851, "eval_samples_per_second": 56.561, "eval_steps_per_second": 7.071, "step": 1300 }, { "epoch": 0.2879523389232127, "grad_norm": 0.4968324601650238, "learning_rate": 9.761445691536598e-06, "loss": 0.2526, "step": 1305 }, { "epoch": 0.2890556045895852, "grad_norm": 0.48738226294517517, "learning_rate": 9.758653780673381e-06, "loss": 0.243, "step": 1310 }, { "epoch": 0.29015887025595766, "grad_norm": 0.45023027062416077, "learning_rate": 9.755846031186521e-06, "loss": 0.2463, "step": 1315 }, { "epoch": 0.2912621359223301, "grad_norm": 0.5096351504325867, "learning_rate": 9.753022452421286e-06, "loss": 0.2522, "step": 1320 }, { "epoch": 0.29236540158870256, "grad_norm": 0.4321053922176361, "learning_rate": 9.750183053775625e-06, "loss": 0.2482, "step": 1325 }, { "epoch": 0.29346866725507503, "grad_norm": 0.48243576288223267, "learning_rate": 9.747327844700147e-06, "loss": 0.2583, "step": 1330 }, { "epoch": 0.2945719329214475, "grad_norm": 0.5312182903289795, "learning_rate": 9.744456834698083e-06, "loss": 0.2437, "step": 1335 }, { "epoch": 0.29567519858781993, "grad_norm": 0.46811169385910034, "learning_rate": 9.741570033325254e-06, "loss": 0.2387, "step": 1340 }, { "epoch": 0.2967784642541924, "grad_norm": 0.4737708568572998, "learning_rate": 9.738667450190041e-06, "loss": 0.2715, "step": 1345 }, { "epoch": 0.2978817299205649, "grad_norm": 0.4285770058631897, "learning_rate": 9.73574909495335e-06, "loss": 0.2318, "step": 1350 }, { "epoch": 0.2989849955869373, "grad_norm": 0.42456915974617004, "learning_rate": 9.732814977328593e-06, "loss": 0.2534, "step": 1355 }, { "epoch": 0.3000882612533098, "grad_norm": 0.4388004243373871, "learning_rate": 9.729865107081631e-06, "loss": 0.2494, "step": 1360 }, { "epoch": 0.30119152691968226, "grad_norm": 0.48463258147239685, "learning_rate": 9.726899494030768e-06, "loss": 0.2542, "step": 1365 }, { "epoch": 0.30229479258605474, "grad_norm": 0.4798240661621094, "learning_rate": 9.723918148046696e-06, "loss": 0.2485, "step": 1370 }, { "epoch": 0.30339805825242716, "grad_norm": 0.5145127177238464, "learning_rate": 9.720921079052483e-06, "loss": 0.2463, "step": 1375 }, { "epoch": 0.30450132391879964, "grad_norm": 0.4174281358718872, "learning_rate": 9.717908297023517e-06, "loss": 0.2394, "step": 1380 }, { "epoch": 0.3056045895851721, "grad_norm": 0.4736640155315399, "learning_rate": 9.714879811987496e-06, "loss": 0.2474, "step": 1385 }, { "epoch": 0.3067078552515446, "grad_norm": 0.46315228939056396, "learning_rate": 9.711835634024378e-06, "loss": 0.2482, "step": 1390 }, { "epoch": 0.307811120917917, "grad_norm": 0.541100800037384, "learning_rate": 9.708775773266353e-06, "loss": 0.25, "step": 1395 }, { "epoch": 0.3089143865842895, "grad_norm": 0.4666937589645386, "learning_rate": 9.705700239897809e-06, "loss": 0.239, "step": 1400 }, { "epoch": 0.3089143865842895, "eval_loss": 0.25553634762763977, "eval_runtime": 271.0024, "eval_samples_per_second": 56.321, "eval_steps_per_second": 7.041, "step": 1400 }, { "epoch": 0.31001765225066197, "grad_norm": 0.49646076560020447, "learning_rate": 9.702609044155303e-06, "loss": 0.2436, "step": 1405 }, { "epoch": 0.31112091791703445, "grad_norm": 0.48308032751083374, "learning_rate": 9.699502196327515e-06, "loss": 0.2517, "step": 1410 }, { "epoch": 0.31222418358340687, "grad_norm": 0.6409610509872437, "learning_rate": 9.69637970675523e-06, "loss": 0.2509, "step": 1415 }, { "epoch": 0.31332744924977934, "grad_norm": 0.5959620475769043, "learning_rate": 9.69324158583129e-06, "loss": 0.256, "step": 1420 }, { "epoch": 0.3144307149161518, "grad_norm": 0.5620144009590149, "learning_rate": 9.69008784400056e-06, "loss": 0.2569, "step": 1425 }, { "epoch": 0.3155339805825243, "grad_norm": 0.5051830410957336, "learning_rate": 9.686918491759904e-06, "loss": 0.2471, "step": 1430 }, { "epoch": 0.3166372462488967, "grad_norm": 0.49281784892082214, "learning_rate": 9.68373353965814e-06, "loss": 0.2301, "step": 1435 }, { "epoch": 0.3177405119152692, "grad_norm": 0.4283227324485779, "learning_rate": 9.68053299829601e-06, "loss": 0.2344, "step": 1440 }, { "epoch": 0.3188437775816417, "grad_norm": 0.4529547095298767, "learning_rate": 9.677316878326144e-06, "loss": 0.2557, "step": 1445 }, { "epoch": 0.3199470432480141, "grad_norm": 0.40247344970703125, "learning_rate": 9.67408519045302e-06, "loss": 0.2486, "step": 1450 }, { "epoch": 0.3210503089143866, "grad_norm": 0.43372419476509094, "learning_rate": 9.670837945432934e-06, "loss": 0.2453, "step": 1455 }, { "epoch": 0.32215357458075905, "grad_norm": 0.4570685625076294, "learning_rate": 9.667575154073962e-06, "loss": 0.2617, "step": 1460 }, { "epoch": 0.3232568402471315, "grad_norm": 0.5153756141662598, "learning_rate": 9.664296827235924e-06, "loss": 0.2564, "step": 1465 }, { "epoch": 0.32436010591350395, "grad_norm": 0.47910332679748535, "learning_rate": 9.66100297583035e-06, "loss": 0.2503, "step": 1470 }, { "epoch": 0.3254633715798764, "grad_norm": 0.4647476077079773, "learning_rate": 9.657693610820437e-06, "loss": 0.2367, "step": 1475 }, { "epoch": 0.3265666372462489, "grad_norm": 0.5447574257850647, "learning_rate": 9.654368743221022e-06, "loss": 0.2547, "step": 1480 }, { "epoch": 0.3276699029126214, "grad_norm": 0.493915319442749, "learning_rate": 9.651028384098538e-06, "loss": 0.2386, "step": 1485 }, { "epoch": 0.3287731685789938, "grad_norm": 0.4700816869735718, "learning_rate": 9.647672544570981e-06, "loss": 0.2537, "step": 1490 }, { "epoch": 0.3298764342453663, "grad_norm": 0.38883256912231445, "learning_rate": 9.644301235807872e-06, "loss": 0.233, "step": 1495 }, { "epoch": 0.33097969991173876, "grad_norm": 0.4903203547000885, "learning_rate": 9.640914469030216e-06, "loss": 0.2415, "step": 1500 }, { "epoch": 0.33097969991173876, "eval_loss": 0.2547125220298767, "eval_runtime": 274.3354, "eval_samples_per_second": 55.636, "eval_steps_per_second": 6.955, "step": 1500 }, { "epoch": 0.33208296557811123, "grad_norm": 0.4478644132614136, "learning_rate": 9.637512255510475e-06, "loss": 0.236, "step": 1505 }, { "epoch": 0.33318623124448365, "grad_norm": 0.44624054431915283, "learning_rate": 9.634094606572515e-06, "loss": 0.2526, "step": 1510 }, { "epoch": 0.33428949691085613, "grad_norm": 0.4568576514720917, "learning_rate": 9.630661533591584e-06, "loss": 0.2353, "step": 1515 }, { "epoch": 0.3353927625772286, "grad_norm": 0.427226722240448, "learning_rate": 9.627213047994265e-06, "loss": 0.2532, "step": 1520 }, { "epoch": 0.3364960282436011, "grad_norm": 0.4701986610889435, "learning_rate": 9.623749161258437e-06, "loss": 0.2349, "step": 1525 }, { "epoch": 0.3375992939099735, "grad_norm": 0.5643903017044067, "learning_rate": 9.620269884913247e-06, "loss": 0.259, "step": 1530 }, { "epoch": 0.338702559576346, "grad_norm": 0.49091801047325134, "learning_rate": 9.616775230539057e-06, "loss": 0.2512, "step": 1535 }, { "epoch": 0.33980582524271846, "grad_norm": 0.5190874338150024, "learning_rate": 9.613265209767417e-06, "loss": 0.245, "step": 1540 }, { "epoch": 0.3409090909090909, "grad_norm": 0.6141373515129089, "learning_rate": 9.609739834281023e-06, "loss": 0.2742, "step": 1545 }, { "epoch": 0.34201235657546336, "grad_norm": 0.5128368139266968, "learning_rate": 9.606199115813672e-06, "loss": 0.2559, "step": 1550 }, { "epoch": 0.34311562224183584, "grad_norm": 0.441245436668396, "learning_rate": 9.602643066150235e-06, "loss": 0.2515, "step": 1555 }, { "epoch": 0.3442188879082083, "grad_norm": 0.4743674397468567, "learning_rate": 9.599071697126608e-06, "loss": 0.2541, "step": 1560 }, { "epoch": 0.34532215357458074, "grad_norm": 0.5153236389160156, "learning_rate": 9.595485020629676e-06, "loss": 0.2578, "step": 1565 }, { "epoch": 0.3464254192409532, "grad_norm": 0.4311087131500244, "learning_rate": 9.591883048597273e-06, "loss": 0.2548, "step": 1570 }, { "epoch": 0.3475286849073257, "grad_norm": 0.494365930557251, "learning_rate": 9.588265793018141e-06, "loss": 0.256, "step": 1575 }, { "epoch": 0.34863195057369817, "grad_norm": 0.426740825176239, "learning_rate": 9.584633265931894e-06, "loss": 0.2547, "step": 1580 }, { "epoch": 0.3497352162400706, "grad_norm": 0.4335707426071167, "learning_rate": 9.580985479428975e-06, "loss": 0.241, "step": 1585 }, { "epoch": 0.35083848190644307, "grad_norm": 0.47340667247772217, "learning_rate": 9.577322445650616e-06, "loss": 0.2437, "step": 1590 }, { "epoch": 0.35194174757281554, "grad_norm": 0.48211535811424255, "learning_rate": 9.573644176788795e-06, "loss": 0.238, "step": 1595 }, { "epoch": 0.353045013239188, "grad_norm": 0.515032172203064, "learning_rate": 9.569950685086202e-06, "loss": 0.2646, "step": 1600 }, { "epoch": 0.353045013239188, "eval_loss": 0.2541050612926483, "eval_runtime": 272.7736, "eval_samples_per_second": 55.955, "eval_steps_per_second": 6.995, "step": 1600 }, { "epoch": 0.35414827890556044, "grad_norm": 0.4244433641433716, "learning_rate": 9.566241982836193e-06, "loss": 0.2487, "step": 1605 }, { "epoch": 0.3552515445719329, "grad_norm": 0.4410102367401123, "learning_rate": 9.562518082382751e-06, "loss": 0.2385, "step": 1610 }, { "epoch": 0.3563548102383054, "grad_norm": 0.5115966200828552, "learning_rate": 9.558778996120443e-06, "loss": 0.2484, "step": 1615 }, { "epoch": 0.3574580759046778, "grad_norm": 0.4943847954273224, "learning_rate": 9.555024736494382e-06, "loss": 0.2575, "step": 1620 }, { "epoch": 0.3585613415710503, "grad_norm": 0.4769156277179718, "learning_rate": 9.551255316000183e-06, "loss": 0.2432, "step": 1625 }, { "epoch": 0.3596646072374228, "grad_norm": 0.43486344814300537, "learning_rate": 9.54747074718392e-06, "loss": 0.2594, "step": 1630 }, { "epoch": 0.36076787290379525, "grad_norm": 0.45673149824142456, "learning_rate": 9.54367104264209e-06, "loss": 0.2513, "step": 1635 }, { "epoch": 0.36187113857016767, "grad_norm": 0.48159259557724, "learning_rate": 9.539856215021568e-06, "loss": 0.2467, "step": 1640 }, { "epoch": 0.36297440423654015, "grad_norm": 0.4502279460430145, "learning_rate": 9.536026277019562e-06, "loss": 0.2485, "step": 1645 }, { "epoch": 0.3640776699029126, "grad_norm": 0.5324723124504089, "learning_rate": 9.53218124138357e-06, "loss": 0.2417, "step": 1650 }, { "epoch": 0.3651809355692851, "grad_norm": 0.48323342204093933, "learning_rate": 9.528321120911345e-06, "loss": 0.253, "step": 1655 }, { "epoch": 0.3662842012356575, "grad_norm": 0.5192784667015076, "learning_rate": 9.524445928450851e-06, "loss": 0.2301, "step": 1660 }, { "epoch": 0.36738746690203, "grad_norm": 0.5197545886039734, "learning_rate": 9.520555676900214e-06, "loss": 0.2443, "step": 1665 }, { "epoch": 0.3684907325684025, "grad_norm": 0.45566871762275696, "learning_rate": 9.516650379207677e-06, "loss": 0.2447, "step": 1670 }, { "epoch": 0.36959399823477496, "grad_norm": 0.5340574383735657, "learning_rate": 9.51273004837157e-06, "loss": 0.2477, "step": 1675 }, { "epoch": 0.3706972639011474, "grad_norm": 0.4383482336997986, "learning_rate": 9.508794697440257e-06, "loss": 0.2335, "step": 1680 }, { "epoch": 0.37180052956751986, "grad_norm": 0.5311030745506287, "learning_rate": 9.504844339512096e-06, "loss": 0.2474, "step": 1685 }, { "epoch": 0.37290379523389233, "grad_norm": 0.5349487662315369, "learning_rate": 9.50087898773539e-06, "loss": 0.2625, "step": 1690 }, { "epoch": 0.3740070609002648, "grad_norm": 0.42293423414230347, "learning_rate": 9.49689865530835e-06, "loss": 0.2428, "step": 1695 }, { "epoch": 0.37511032656663723, "grad_norm": 0.4599260985851288, "learning_rate": 9.492903355479047e-06, "loss": 0.2497, "step": 1700 }, { "epoch": 0.37511032656663723, "eval_loss": 0.25350308418273926, "eval_runtime": 270.259, "eval_samples_per_second": 56.475, "eval_steps_per_second": 7.06, "step": 1700 }, { "epoch": 0.3762135922330097, "grad_norm": 0.46413329243659973, "learning_rate": 9.488893101545372e-06, "loss": 0.2409, "step": 1705 }, { "epoch": 0.3773168578993822, "grad_norm": 0.45214733481407166, "learning_rate": 9.484867906854986e-06, "loss": 0.2427, "step": 1710 }, { "epoch": 0.3784201235657546, "grad_norm": 0.49880728125572205, "learning_rate": 9.480827784805278e-06, "loss": 0.2404, "step": 1715 }, { "epoch": 0.3795233892321271, "grad_norm": 0.516257107257843, "learning_rate": 9.476772748843327e-06, "loss": 0.2531, "step": 1720 }, { "epoch": 0.38062665489849956, "grad_norm": 0.4441586434841156, "learning_rate": 9.472702812465843e-06, "loss": 0.2339, "step": 1725 }, { "epoch": 0.38172992056487204, "grad_norm": 0.4590930938720703, "learning_rate": 9.468617989219136e-06, "loss": 0.2465, "step": 1730 }, { "epoch": 0.38283318623124446, "grad_norm": 0.43926405906677246, "learning_rate": 9.46451829269906e-06, "loss": 0.2475, "step": 1735 }, { "epoch": 0.38393645189761694, "grad_norm": 0.4270091950893402, "learning_rate": 9.460403736550982e-06, "loss": 0.2404, "step": 1740 }, { "epoch": 0.3850397175639894, "grad_norm": 0.4161515235900879, "learning_rate": 9.45627433446972e-06, "loss": 0.2428, "step": 1745 }, { "epoch": 0.3861429832303619, "grad_norm": 0.4878949820995331, "learning_rate": 9.452130100199504e-06, "loss": 0.2636, "step": 1750 }, { "epoch": 0.3872462488967343, "grad_norm": 0.4900050759315491, "learning_rate": 9.447971047533936e-06, "loss": 0.2415, "step": 1755 }, { "epoch": 0.3883495145631068, "grad_norm": 0.43371209502220154, "learning_rate": 9.443797190315938e-06, "loss": 0.2469, "step": 1760 }, { "epoch": 0.38945278022947927, "grad_norm": 0.43596795201301575, "learning_rate": 9.439608542437704e-06, "loss": 0.2394, "step": 1765 }, { "epoch": 0.39055604589585174, "grad_norm": 0.4555245637893677, "learning_rate": 9.435405117840662e-06, "loss": 0.2435, "step": 1770 }, { "epoch": 0.39165931156222417, "grad_norm": 0.46150678396224976, "learning_rate": 9.431186930515419e-06, "loss": 0.2585, "step": 1775 }, { "epoch": 0.39276257722859664, "grad_norm": 0.42505866289138794, "learning_rate": 9.42695399450172e-06, "loss": 0.2386, "step": 1780 }, { "epoch": 0.3938658428949691, "grad_norm": 0.49516651034355164, "learning_rate": 9.422706323888398e-06, "loss": 0.235, "step": 1785 }, { "epoch": 0.3949691085613416, "grad_norm": 0.48143908381462097, "learning_rate": 9.418443932813328e-06, "loss": 0.2495, "step": 1790 }, { "epoch": 0.396072374227714, "grad_norm": 0.5001795887947083, "learning_rate": 9.414166835463383e-06, "loss": 0.247, "step": 1795 }, { "epoch": 0.3971756398940865, "grad_norm": 0.47970953583717346, "learning_rate": 9.409875046074379e-06, "loss": 0.2486, "step": 1800 }, { "epoch": 0.3971756398940865, "eval_loss": 0.2526043653488159, "eval_runtime": 269.6648, "eval_samples_per_second": 56.6, "eval_steps_per_second": 7.075, "step": 1800 }, { "epoch": 0.398278905560459, "grad_norm": 0.5398975610733032, "learning_rate": 9.405568578931042e-06, "loss": 0.257, "step": 1805 }, { "epoch": 0.3993821712268314, "grad_norm": 0.4145001769065857, "learning_rate": 9.401247448366937e-06, "loss": 0.2305, "step": 1810 }, { "epoch": 0.40048543689320387, "grad_norm": 0.49223729968070984, "learning_rate": 9.39691166876445e-06, "loss": 0.2385, "step": 1815 }, { "epoch": 0.40158870255957635, "grad_norm": 0.5020371675491333, "learning_rate": 9.392561254554712e-06, "loss": 0.2507, "step": 1820 }, { "epoch": 0.4026919682259488, "grad_norm": 0.4438912868499756, "learning_rate": 9.388196220217574e-06, "loss": 0.2442, "step": 1825 }, { "epoch": 0.40379523389232125, "grad_norm": 0.5784342288970947, "learning_rate": 9.383816580281539e-06, "loss": 0.2434, "step": 1830 }, { "epoch": 0.4048984995586937, "grad_norm": 0.4573621451854706, "learning_rate": 9.379422349323728e-06, "loss": 0.2348, "step": 1835 }, { "epoch": 0.4060017652250662, "grad_norm": 0.5133495926856995, "learning_rate": 9.375013541969828e-06, "loss": 0.2474, "step": 1840 }, { "epoch": 0.4071050308914387, "grad_norm": 0.5082767605781555, "learning_rate": 9.370590172894037e-06, "loss": 0.2424, "step": 1845 }, { "epoch": 0.4082082965578111, "grad_norm": 0.41318631172180176, "learning_rate": 9.366152256819025e-06, "loss": 0.2459, "step": 1850 }, { "epoch": 0.4093115622241836, "grad_norm": 0.48783794045448303, "learning_rate": 9.361699808515877e-06, "loss": 0.2332, "step": 1855 }, { "epoch": 0.41041482789055606, "grad_norm": 0.46912387013435364, "learning_rate": 9.357232842804045e-06, "loss": 0.2362, "step": 1860 }, { "epoch": 0.41151809355692853, "grad_norm": 0.5062457323074341, "learning_rate": 9.352751374551305e-06, "loss": 0.2479, "step": 1865 }, { "epoch": 0.41262135922330095, "grad_norm": 0.45189908146858215, "learning_rate": 9.348255418673702e-06, "loss": 0.2597, "step": 1870 }, { "epoch": 0.41372462488967343, "grad_norm": 0.43714070320129395, "learning_rate": 9.3437449901355e-06, "loss": 0.2447, "step": 1875 }, { "epoch": 0.4148278905560459, "grad_norm": 0.44575101137161255, "learning_rate": 9.339220103949132e-06, "loss": 0.2572, "step": 1880 }, { "epoch": 0.4159311562224184, "grad_norm": 0.4869813024997711, "learning_rate": 9.334680775175154e-06, "loss": 0.2469, "step": 1885 }, { "epoch": 0.4170344218887908, "grad_norm": 0.4805983901023865, "learning_rate": 9.330127018922195e-06, "loss": 0.2427, "step": 1890 }, { "epoch": 0.4181376875551633, "grad_norm": 0.47126686573028564, "learning_rate": 9.325558850346897e-06, "loss": 0.2448, "step": 1895 }, { "epoch": 0.41924095322153576, "grad_norm": 0.5163640975952148, "learning_rate": 9.320976284653877e-06, "loss": 0.2289, "step": 1900 }, { "epoch": 0.41924095322153576, "eval_loss": 0.25212275981903076, "eval_runtime": 270.6499, "eval_samples_per_second": 56.394, "eval_steps_per_second": 7.05, "step": 1900 }, { "epoch": 0.4203442188879082, "grad_norm": 0.46562832593917847, "learning_rate": 9.316379337095671e-06, "loss": 0.255, "step": 1905 }, { "epoch": 0.42144748455428066, "grad_norm": 0.3981192708015442, "learning_rate": 9.311768022972682e-06, "loss": 0.2455, "step": 1910 }, { "epoch": 0.42255075022065314, "grad_norm": 0.4480000138282776, "learning_rate": 9.307142357633132e-06, "loss": 0.2437, "step": 1915 }, { "epoch": 0.4236540158870256, "grad_norm": 0.4353036880493164, "learning_rate": 9.302502356473006e-06, "loss": 0.2435, "step": 1920 }, { "epoch": 0.42475728155339804, "grad_norm": 0.42388132214546204, "learning_rate": 9.297848034936007e-06, "loss": 0.2458, "step": 1925 }, { "epoch": 0.4258605472197705, "grad_norm": 0.5140712261199951, "learning_rate": 9.293179408513501e-06, "loss": 0.2469, "step": 1930 }, { "epoch": 0.426963812886143, "grad_norm": 0.5060368180274963, "learning_rate": 9.288496492744466e-06, "loss": 0.2499, "step": 1935 }, { "epoch": 0.42806707855251547, "grad_norm": 0.43134334683418274, "learning_rate": 9.283799303215442e-06, "loss": 0.233, "step": 1940 }, { "epoch": 0.4291703442188879, "grad_norm": 0.48315203189849854, "learning_rate": 9.279087855560474e-06, "loss": 0.2457, "step": 1945 }, { "epoch": 0.43027360988526037, "grad_norm": 0.4424877166748047, "learning_rate": 9.274362165461064e-06, "loss": 0.2402, "step": 1950 }, { "epoch": 0.43137687555163284, "grad_norm": 0.47104790806770325, "learning_rate": 9.269622248646124e-06, "loss": 0.2419, "step": 1955 }, { "epoch": 0.4324801412180053, "grad_norm": 0.4866120517253876, "learning_rate": 9.264868120891913e-06, "loss": 0.2428, "step": 1960 }, { "epoch": 0.43358340688437774, "grad_norm": 0.47548824548721313, "learning_rate": 9.260099798021988e-06, "loss": 0.2355, "step": 1965 }, { "epoch": 0.4346866725507502, "grad_norm": 0.4570111930370331, "learning_rate": 9.255317295907158e-06, "loss": 0.2509, "step": 1970 }, { "epoch": 0.4357899382171227, "grad_norm": 0.5114912986755371, "learning_rate": 9.250520630465419e-06, "loss": 0.2409, "step": 1975 }, { "epoch": 0.4368932038834951, "grad_norm": 0.38849082589149475, "learning_rate": 9.245709817661917e-06, "loss": 0.2413, "step": 1980 }, { "epoch": 0.4379964695498676, "grad_norm": 0.5250911712646484, "learning_rate": 9.240884873508876e-06, "loss": 0.2416, "step": 1985 }, { "epoch": 0.4390997352162401, "grad_norm": 0.43927446007728577, "learning_rate": 9.236045814065563e-06, "loss": 0.2399, "step": 1990 }, { "epoch": 0.44020300088261255, "grad_norm": 0.5229560136795044, "learning_rate": 9.231192655438222e-06, "loss": 0.2536, "step": 1995 }, { "epoch": 0.44130626654898497, "grad_norm": 0.5083780884742737, "learning_rate": 9.226325413780021e-06, "loss": 0.2324, "step": 2000 }, { "epoch": 0.44130626654898497, "eval_loss": 0.25146690011024475, "eval_runtime": 270.7254, "eval_samples_per_second": 56.378, "eval_steps_per_second": 7.048, "step": 2000 }, { "epoch": 0.44240953221535745, "grad_norm": 0.4707069396972656, "learning_rate": 9.221444105291013e-06, "loss": 0.2594, "step": 2005 }, { "epoch": 0.4435127978817299, "grad_norm": 0.4856661260128021, "learning_rate": 9.216548746218056e-06, "loss": 0.2493, "step": 2010 }, { "epoch": 0.4446160635481024, "grad_norm": 0.4883829951286316, "learning_rate": 9.211639352854786e-06, "loss": 0.2468, "step": 2015 }, { "epoch": 0.4457193292144748, "grad_norm": 0.4665009379386902, "learning_rate": 9.206715941541547e-06, "loss": 0.2519, "step": 2020 }, { "epoch": 0.4468225948808473, "grad_norm": 0.45250222086906433, "learning_rate": 9.201778528665333e-06, "loss": 0.2436, "step": 2025 }, { "epoch": 0.4479258605472198, "grad_norm": 0.457640677690506, "learning_rate": 9.196827130659752e-06, "loss": 0.2575, "step": 2030 }, { "epoch": 0.44902912621359226, "grad_norm": 0.3947480320930481, "learning_rate": 9.19186176400495e-06, "loss": 0.2521, "step": 2035 }, { "epoch": 0.4501323918799647, "grad_norm": 0.5039179921150208, "learning_rate": 9.186882445227572e-06, "loss": 0.2464, "step": 2040 }, { "epoch": 0.45123565754633715, "grad_norm": 0.46842432022094727, "learning_rate": 9.181889190900702e-06, "loss": 0.2603, "step": 2045 }, { "epoch": 0.45233892321270963, "grad_norm": 0.40255504846572876, "learning_rate": 9.1768820176438e-06, "loss": 0.242, "step": 2050 }, { "epoch": 0.4534421888790821, "grad_norm": 0.4471176564693451, "learning_rate": 9.17186094212266e-06, "loss": 0.2349, "step": 2055 }, { "epoch": 0.45454545454545453, "grad_norm": 0.4603224992752075, "learning_rate": 9.166825981049345e-06, "loss": 0.2449, "step": 2060 }, { "epoch": 0.455648720211827, "grad_norm": 0.4398849308490753, "learning_rate": 9.161777151182137e-06, "loss": 0.2346, "step": 2065 }, { "epoch": 0.4567519858781995, "grad_norm": 0.54154372215271, "learning_rate": 9.156714469325474e-06, "loss": 0.2469, "step": 2070 }, { "epoch": 0.4578552515445719, "grad_norm": 0.5123459696769714, "learning_rate": 9.151637952329903e-06, "loss": 0.253, "step": 2075 }, { "epoch": 0.4589585172109444, "grad_norm": 0.40907183289527893, "learning_rate": 9.14654761709202e-06, "loss": 0.2467, "step": 2080 }, { "epoch": 0.46006178287731686, "grad_norm": 0.47456130385398865, "learning_rate": 9.141443480554408e-06, "loss": 0.2492, "step": 2085 }, { "epoch": 0.46116504854368934, "grad_norm": 0.4863825738430023, "learning_rate": 9.136325559705593e-06, "loss": 0.2416, "step": 2090 }, { "epoch": 0.46226831421006176, "grad_norm": 0.41943055391311646, "learning_rate": 9.131193871579975e-06, "loss": 0.2352, "step": 2095 }, { "epoch": 0.46337157987643424, "grad_norm": 0.49511662125587463, "learning_rate": 9.12604843325778e-06, "loss": 0.2425, "step": 2100 }, { "epoch": 0.46337157987643424, "eval_loss": 0.25082722306251526, "eval_runtime": 268.229, "eval_samples_per_second": 56.903, "eval_steps_per_second": 7.113, "step": 2100 }, { "epoch": 0.4644748455428067, "grad_norm": 0.4573848247528076, "learning_rate": 9.120889261864999e-06, "loss": 0.2586, "step": 2105 }, { "epoch": 0.4655781112091792, "grad_norm": 0.48330816626548767, "learning_rate": 9.11571637457333e-06, "loss": 0.2471, "step": 2110 }, { "epoch": 0.4666813768755516, "grad_norm": 0.5316815376281738, "learning_rate": 9.110529788600127e-06, "loss": 0.2398, "step": 2115 }, { "epoch": 0.4677846425419241, "grad_norm": 0.45236679911613464, "learning_rate": 9.105329521208334e-06, "loss": 0.2471, "step": 2120 }, { "epoch": 0.46888790820829657, "grad_norm": 0.4722297787666321, "learning_rate": 9.100115589706436e-06, "loss": 0.2428, "step": 2125 }, { "epoch": 0.46999117387466904, "grad_norm": 0.4543675482273102, "learning_rate": 9.094888011448391e-06, "loss": 0.2516, "step": 2130 }, { "epoch": 0.47109443954104147, "grad_norm": 0.4152880907058716, "learning_rate": 9.089646803833589e-06, "loss": 0.225, "step": 2135 }, { "epoch": 0.47219770520741394, "grad_norm": 0.44709253311157227, "learning_rate": 9.084391984306775e-06, "loss": 0.2456, "step": 2140 }, { "epoch": 0.4733009708737864, "grad_norm": 0.5279027819633484, "learning_rate": 9.079123570358e-06, "loss": 0.2415, "step": 2145 }, { "epoch": 0.4744042365401589, "grad_norm": 0.4792356491088867, "learning_rate": 9.073841579522571e-06, "loss": 0.2543, "step": 2150 }, { "epoch": 0.4755075022065313, "grad_norm": 0.45700347423553467, "learning_rate": 9.068546029380971e-06, "loss": 0.2593, "step": 2155 }, { "epoch": 0.4766107678729038, "grad_norm": 0.5032837986946106, "learning_rate": 9.063236937558826e-06, "loss": 0.2528, "step": 2160 }, { "epoch": 0.4777140335392763, "grad_norm": 0.48134273290634155, "learning_rate": 9.057914321726824e-06, "loss": 0.2553, "step": 2165 }, { "epoch": 0.4788172992056487, "grad_norm": 0.45645344257354736, "learning_rate": 9.052578199600675e-06, "loss": 0.2387, "step": 2170 }, { "epoch": 0.47992056487202117, "grad_norm": 0.4026988744735718, "learning_rate": 9.047228588941034e-06, "loss": 0.228, "step": 2175 }, { "epoch": 0.48102383053839365, "grad_norm": 0.4304678440093994, "learning_rate": 9.041865507553458e-06, "loss": 0.2513, "step": 2180 }, { "epoch": 0.4821270962047661, "grad_norm": 0.4108814299106598, "learning_rate": 9.036488973288339e-06, "loss": 0.238, "step": 2185 }, { "epoch": 0.48323036187113855, "grad_norm": 0.4733142852783203, "learning_rate": 9.031099004040841e-06, "loss": 0.2506, "step": 2190 }, { "epoch": 0.484333627537511, "grad_norm": 0.4324648380279541, "learning_rate": 9.025695617750848e-06, "loss": 0.2243, "step": 2195 }, { "epoch": 0.4854368932038835, "grad_norm": 0.4637969136238098, "learning_rate": 9.020278832402902e-06, "loss": 0.2545, "step": 2200 }, { "epoch": 0.4854368932038835, "eval_loss": 0.25019994378089905, "eval_runtime": 271.9518, "eval_samples_per_second": 56.124, "eval_steps_per_second": 7.016, "step": 2200 }, { "epoch": 0.486540158870256, "grad_norm": 0.4881959855556488, "learning_rate": 9.014848666026138e-06, "loss": 0.2467, "step": 2205 }, { "epoch": 0.4876434245366284, "grad_norm": 0.44622328877449036, "learning_rate": 9.009405136694234e-06, "loss": 0.2512, "step": 2210 }, { "epoch": 0.4887466902030009, "grad_norm": 0.4718511700630188, "learning_rate": 9.003948262525341e-06, "loss": 0.247, "step": 2215 }, { "epoch": 0.48984995586937335, "grad_norm": 0.4678577780723572, "learning_rate": 8.998478061682025e-06, "loss": 0.2301, "step": 2220 }, { "epoch": 0.49095322153574583, "grad_norm": 0.44579043984413147, "learning_rate": 8.992994552371217e-06, "loss": 0.2513, "step": 2225 }, { "epoch": 0.49205648720211825, "grad_norm": 0.4663316309452057, "learning_rate": 8.987497752844132e-06, "loss": 0.2568, "step": 2230 }, { "epoch": 0.49315975286849073, "grad_norm": 0.5495996475219727, "learning_rate": 8.981987681396226e-06, "loss": 0.2626, "step": 2235 }, { "epoch": 0.4942630185348632, "grad_norm": 0.4634556174278259, "learning_rate": 8.976464356367133e-06, "loss": 0.2523, "step": 2240 }, { "epoch": 0.4953662842012357, "grad_norm": 0.4804018437862396, "learning_rate": 8.970927796140592e-06, "loss": 0.2323, "step": 2245 }, { "epoch": 0.4964695498676081, "grad_norm": 0.4309288561344147, "learning_rate": 8.965378019144397e-06, "loss": 0.2432, "step": 2250 }, { "epoch": 0.4975728155339806, "grad_norm": 0.4046125113964081, "learning_rate": 8.959815043850336e-06, "loss": 0.228, "step": 2255 }, { "epoch": 0.49867608120035306, "grad_norm": 0.4361225366592407, "learning_rate": 8.95423888877412e-06, "loss": 0.2398, "step": 2260 }, { "epoch": 0.4997793468667255, "grad_norm": 0.4338124096393585, "learning_rate": 8.948649572475332e-06, "loss": 0.2471, "step": 2265 }, { "epoch": 0.500882612533098, "grad_norm": 0.4460661709308624, "learning_rate": 8.943047113557358e-06, "loss": 0.2525, "step": 2270 }, { "epoch": 0.5019858781994704, "grad_norm": 0.43851226568222046, "learning_rate": 8.937431530667329e-06, "loss": 0.2412, "step": 2275 }, { "epoch": 0.5030891438658429, "grad_norm": 0.4404292702674866, "learning_rate": 8.931802842496056e-06, "loss": 0.2467, "step": 2280 }, { "epoch": 0.5041924095322153, "grad_norm": 0.5048953890800476, "learning_rate": 8.926161067777973e-06, "loss": 0.2503, "step": 2285 }, { "epoch": 0.5052956751985879, "grad_norm": 0.48402130603790283, "learning_rate": 8.920506225291067e-06, "loss": 0.2441, "step": 2290 }, { "epoch": 0.5063989408649603, "grad_norm": 0.409432977437973, "learning_rate": 8.914838333856822e-06, "loss": 0.2388, "step": 2295 }, { "epoch": 0.5075022065313327, "grad_norm": 0.41323322057724, "learning_rate": 8.90915741234015e-06, "loss": 0.2372, "step": 2300 }, { "epoch": 0.5075022065313327, "eval_loss": 0.24960678815841675, "eval_runtime": 272.2362, "eval_samples_per_second": 56.065, "eval_steps_per_second": 7.009, "step": 2300 }, { "epoch": 0.5086054721977052, "grad_norm": 0.5443533062934875, "learning_rate": 8.90346347964934e-06, "loss": 0.2311, "step": 2305 }, { "epoch": 0.5097087378640777, "grad_norm": 0.42860737442970276, "learning_rate": 8.897756554735976e-06, "loss": 0.2537, "step": 2310 }, { "epoch": 0.5108120035304501, "grad_norm": 0.4304381012916565, "learning_rate": 8.892036656594898e-06, "loss": 0.2366, "step": 2315 }, { "epoch": 0.5119152691968226, "grad_norm": 0.5057708024978638, "learning_rate": 8.886303804264117e-06, "loss": 0.2362, "step": 2320 }, { "epoch": 0.513018534863195, "grad_norm": 0.48017001152038574, "learning_rate": 8.88055801682476e-06, "loss": 0.2493, "step": 2325 }, { "epoch": 0.5141218005295676, "grad_norm": 0.441488653421402, "learning_rate": 8.874799313401014e-06, "loss": 0.2413, "step": 2330 }, { "epoch": 0.51522506619594, "grad_norm": 0.5098276138305664, "learning_rate": 8.86902771316005e-06, "loss": 0.2496, "step": 2335 }, { "epoch": 0.5163283318623124, "grad_norm": 0.43526649475097656, "learning_rate": 8.863243235311964e-06, "loss": 0.2452, "step": 2340 }, { "epoch": 0.517431597528685, "grad_norm": 0.48061615228652954, "learning_rate": 8.857445899109716e-06, "loss": 0.2521, "step": 2345 }, { "epoch": 0.5185348631950574, "grad_norm": 0.425627201795578, "learning_rate": 8.851635723849062e-06, "loss": 0.251, "step": 2350 }, { "epoch": 0.5196381288614298, "grad_norm": 0.39612120389938354, "learning_rate": 8.845812728868496e-06, "loss": 0.2366, "step": 2355 }, { "epoch": 0.5207413945278023, "grad_norm": 0.43580201268196106, "learning_rate": 8.839976933549173e-06, "loss": 0.2501, "step": 2360 }, { "epoch": 0.5218446601941747, "grad_norm": 0.3925994038581848, "learning_rate": 8.834128357314856e-06, "loss": 0.2356, "step": 2365 }, { "epoch": 0.5229479258605472, "grad_norm": 0.4675627648830414, "learning_rate": 8.828267019631852e-06, "loss": 0.2439, "step": 2370 }, { "epoch": 0.5240511915269197, "grad_norm": 0.5115921497344971, "learning_rate": 8.822392940008937e-06, "loss": 0.2434, "step": 2375 }, { "epoch": 0.5251544571932921, "grad_norm": 0.5380107760429382, "learning_rate": 8.8165061379973e-06, "loss": 0.2476, "step": 2380 }, { "epoch": 0.5262577228596647, "grad_norm": 0.541187047958374, "learning_rate": 8.810606633190475e-06, "loss": 0.2397, "step": 2385 }, { "epoch": 0.5273609885260371, "grad_norm": 0.49486243724823, "learning_rate": 8.804694445224274e-06, "loss": 0.2548, "step": 2390 }, { "epoch": 0.5284642541924095, "grad_norm": 0.5872311592102051, "learning_rate": 8.798769593776723e-06, "loss": 0.239, "step": 2395 }, { "epoch": 0.529567519858782, "grad_norm": 0.48262667655944824, "learning_rate": 8.792832098568002e-06, "loss": 0.2328, "step": 2400 }, { "epoch": 0.529567519858782, "eval_loss": 0.24928364157676697, "eval_runtime": 271.3099, "eval_samples_per_second": 56.257, "eval_steps_per_second": 7.033, "step": 2400 }, { "epoch": 0.5306707855251545, "grad_norm": 0.40170180797576904, "learning_rate": 8.786881979360368e-06, "loss": 0.2564, "step": 2405 }, { "epoch": 0.5317740511915269, "grad_norm": 0.44170036911964417, "learning_rate": 8.7809192559581e-06, "loss": 0.2413, "step": 2410 }, { "epoch": 0.5328773168578994, "grad_norm": 0.4831240177154541, "learning_rate": 8.774943948207427e-06, "loss": 0.2391, "step": 2415 }, { "epoch": 0.5339805825242718, "grad_norm": 0.39944949746131897, "learning_rate": 8.76895607599646e-06, "loss": 0.2361, "step": 2420 }, { "epoch": 0.5350838481906444, "grad_norm": 0.4743267595767975, "learning_rate": 8.762955659255137e-06, "loss": 0.2516, "step": 2425 }, { "epoch": 0.5361871138570168, "grad_norm": 0.4756656289100647, "learning_rate": 8.756942717955142e-06, "loss": 0.2565, "step": 2430 }, { "epoch": 0.5372903795233892, "grad_norm": 0.45802560448646545, "learning_rate": 8.750917272109849e-06, "loss": 0.2386, "step": 2435 }, { "epoch": 0.5383936451897617, "grad_norm": 0.45499125123023987, "learning_rate": 8.744879341774251e-06, "loss": 0.2397, "step": 2440 }, { "epoch": 0.5394969108561342, "grad_norm": 0.3336021900177002, "learning_rate": 8.738828947044895e-06, "loss": 0.236, "step": 2445 }, { "epoch": 0.5406001765225066, "grad_norm": 0.4355071485042572, "learning_rate": 8.732766108059814e-06, "loss": 0.2363, "step": 2450 }, { "epoch": 0.5417034421888791, "grad_norm": 0.4942583441734314, "learning_rate": 8.726690844998457e-06, "loss": 0.2301, "step": 2455 }, { "epoch": 0.5428067078552515, "grad_norm": 0.4470270574092865, "learning_rate": 8.720603178081632e-06, "loss": 0.2357, "step": 2460 }, { "epoch": 0.543909973521624, "grad_norm": 0.4445681571960449, "learning_rate": 8.714503127571425e-06, "loss": 0.2558, "step": 2465 }, { "epoch": 0.5450132391879965, "grad_norm": 0.5611670613288879, "learning_rate": 8.708390713771145e-06, "loss": 0.2444, "step": 2470 }, { "epoch": 0.5461165048543689, "grad_norm": 0.37664106488227844, "learning_rate": 8.702265957025241e-06, "loss": 0.2511, "step": 2475 }, { "epoch": 0.5472197705207414, "grad_norm": 0.5034042596817017, "learning_rate": 8.696128877719258e-06, "loss": 0.2483, "step": 2480 }, { "epoch": 0.5483230361871139, "grad_norm": 0.4550327956676483, "learning_rate": 8.689979496279747e-06, "loss": 0.2404, "step": 2485 }, { "epoch": 0.5494263018534863, "grad_norm": 0.4192756712436676, "learning_rate": 8.683817833174204e-06, "loss": 0.2272, "step": 2490 }, { "epoch": 0.5505295675198588, "grad_norm": 0.49941694736480713, "learning_rate": 8.677643908911007e-06, "loss": 0.2461, "step": 2495 }, { "epoch": 0.5516328331862312, "grad_norm": 0.48723432421684265, "learning_rate": 8.67145774403934e-06, "loss": 0.2359, "step": 2500 }, { "epoch": 0.5516328331862312, "eval_loss": 0.24847546219825745, "eval_runtime": 268.7693, "eval_samples_per_second": 56.788, "eval_steps_per_second": 7.099, "step": 2500 }, { "epoch": 0.5527360988526037, "grad_norm": 0.4447115957736969, "learning_rate": 8.665259359149132e-06, "loss": 0.244, "step": 2505 }, { "epoch": 0.5538393645189762, "grad_norm": 0.46144431829452515, "learning_rate": 8.659048774870986e-06, "loss": 0.2509, "step": 2510 }, { "epoch": 0.5549426301853486, "grad_norm": 0.41772812604904175, "learning_rate": 8.652826011876104e-06, "loss": 0.2422, "step": 2515 }, { "epoch": 0.556045895851721, "grad_norm": 0.45326176285743713, "learning_rate": 8.646591090876225e-06, "loss": 0.241, "step": 2520 }, { "epoch": 0.5571491615180936, "grad_norm": 0.4441646337509155, "learning_rate": 8.64034403262356e-06, "loss": 0.2445, "step": 2525 }, { "epoch": 0.558252427184466, "grad_norm": 0.5038093328475952, "learning_rate": 8.634084857910709e-06, "loss": 0.2478, "step": 2530 }, { "epoch": 0.5593556928508385, "grad_norm": 0.4078108072280884, "learning_rate": 8.627813587570609e-06, "loss": 0.255, "step": 2535 }, { "epoch": 0.560458958517211, "grad_norm": 0.4333765506744385, "learning_rate": 8.621530242476446e-06, "loss": 0.2438, "step": 2540 }, { "epoch": 0.5615622241835834, "grad_norm": 0.5008811354637146, "learning_rate": 8.615234843541606e-06, "loss": 0.2388, "step": 2545 }, { "epoch": 0.5626654898499559, "grad_norm": 0.3749838173389435, "learning_rate": 8.608927411719585e-06, "loss": 0.2422, "step": 2550 }, { "epoch": 0.5637687555163283, "grad_norm": 0.5079739093780518, "learning_rate": 8.602607968003935e-06, "loss": 0.2367, "step": 2555 }, { "epoch": 0.5648720211827007, "grad_norm": 0.40866804122924805, "learning_rate": 8.59627653342819e-06, "loss": 0.2598, "step": 2560 }, { "epoch": 0.5659752868490733, "grad_norm": 0.3951939642429352, "learning_rate": 8.589933129065786e-06, "loss": 0.2316, "step": 2565 }, { "epoch": 0.5670785525154457, "grad_norm": 0.41789600253105164, "learning_rate": 8.583577776030005e-06, "loss": 0.2412, "step": 2570 }, { "epoch": 0.5681818181818182, "grad_norm": 0.5892974138259888, "learning_rate": 8.5772104954739e-06, "loss": 0.2441, "step": 2575 }, { "epoch": 0.5692850838481907, "grad_norm": 0.46684080362319946, "learning_rate": 8.570831308590219e-06, "loss": 0.2437, "step": 2580 }, { "epoch": 0.5703883495145631, "grad_norm": 0.5170934796333313, "learning_rate": 8.564440236611344e-06, "loss": 0.2436, "step": 2585 }, { "epoch": 0.5714916151809356, "grad_norm": 0.5239847302436829, "learning_rate": 8.558037300809209e-06, "loss": 0.2458, "step": 2590 }, { "epoch": 0.572594880847308, "grad_norm": 0.4109562933444977, "learning_rate": 8.551622522495238e-06, "loss": 0.2492, "step": 2595 }, { "epoch": 0.5736981465136805, "grad_norm": 0.40857604146003723, "learning_rate": 8.545195923020273e-06, "loss": 0.24, "step": 2600 }, { "epoch": 0.5736981465136805, "eval_loss": 0.2479788064956665, "eval_runtime": 270.4896, "eval_samples_per_second": 56.427, "eval_steps_per_second": 7.054, "step": 2600 }, { "epoch": 0.574801412180053, "grad_norm": 0.47509685158729553, "learning_rate": 8.538757523774503e-06, "loss": 0.2835, "step": 2605 }, { "epoch": 0.5759046778464254, "grad_norm": 0.47140100598335266, "learning_rate": 8.532307346187384e-06, "loss": 0.2372, "step": 2610 }, { "epoch": 0.5770079435127978, "grad_norm": 0.4311586618423462, "learning_rate": 8.525845411727581e-06, "loss": 0.2446, "step": 2615 }, { "epoch": 0.5781112091791704, "grad_norm": 0.44329634308815, "learning_rate": 8.519371741902888e-06, "loss": 0.2419, "step": 2620 }, { "epoch": 0.5792144748455428, "grad_norm": 0.4547870457172394, "learning_rate": 8.512886358260162e-06, "loss": 0.2398, "step": 2625 }, { "epoch": 0.5803177405119153, "grad_norm": 0.47059762477874756, "learning_rate": 8.506389282385242e-06, "loss": 0.2512, "step": 2630 }, { "epoch": 0.5814210061782877, "grad_norm": 0.49984219670295715, "learning_rate": 8.499880535902885e-06, "loss": 0.2575, "step": 2635 }, { "epoch": 0.5825242718446602, "grad_norm": 0.4017459452152252, "learning_rate": 8.493360140476699e-06, "loss": 0.2352, "step": 2640 }, { "epoch": 0.5836275375110327, "grad_norm": 0.3862455487251282, "learning_rate": 8.486828117809057e-06, "loss": 0.2317, "step": 2645 }, { "epoch": 0.5847308031774051, "grad_norm": 0.4500201642513275, "learning_rate": 8.480284489641034e-06, "loss": 0.2385, "step": 2650 }, { "epoch": 0.5858340688437775, "grad_norm": 0.5192285776138306, "learning_rate": 8.473729277752331e-06, "loss": 0.2426, "step": 2655 }, { "epoch": 0.5869373345101501, "grad_norm": 0.41023924946784973, "learning_rate": 8.467162503961209e-06, "loss": 0.2346, "step": 2660 }, { "epoch": 0.5880406001765225, "grad_norm": 0.44286391139030457, "learning_rate": 8.460584190124405e-06, "loss": 0.246, "step": 2665 }, { "epoch": 0.589143865842895, "grad_norm": 0.45593389868736267, "learning_rate": 8.45399435813707e-06, "loss": 0.2371, "step": 2670 }, { "epoch": 0.5902471315092674, "grad_norm": 0.4817209839820862, "learning_rate": 8.447393029932692e-06, "loss": 0.2376, "step": 2675 }, { "epoch": 0.5913503971756399, "grad_norm": 0.4856320917606354, "learning_rate": 8.440780227483016e-06, "loss": 0.2451, "step": 2680 }, { "epoch": 0.5924536628420124, "grad_norm": 0.42935821413993835, "learning_rate": 8.43415597279799e-06, "loss": 0.2422, "step": 2685 }, { "epoch": 0.5935569285083848, "grad_norm": 0.45911160111427307, "learning_rate": 8.427520287925669e-06, "loss": 0.2397, "step": 2690 }, { "epoch": 0.5946601941747572, "grad_norm": 0.4541209638118744, "learning_rate": 8.420873194952153e-06, "loss": 0.2392, "step": 2695 }, { "epoch": 0.5957634598411298, "grad_norm": 0.4879801273345947, "learning_rate": 8.414214716001519e-06, "loss": 0.2479, "step": 2700 }, { "epoch": 0.5957634598411298, "eval_loss": 0.24747207760810852, "eval_runtime": 270.41, "eval_samples_per_second": 56.444, "eval_steps_per_second": 7.056, "step": 2700 }, { "epoch": 0.5968667255075022, "grad_norm": 0.4193342328071594, "learning_rate": 8.407544873235736e-06, "loss": 0.2482, "step": 2705 }, { "epoch": 0.5979699911738746, "grad_norm": 0.45270466804504395, "learning_rate": 8.400863688854598e-06, "loss": 0.2469, "step": 2710 }, { "epoch": 0.5990732568402471, "grad_norm": 0.4806990623474121, "learning_rate": 8.394171185095646e-06, "loss": 0.2442, "step": 2715 }, { "epoch": 0.6001765225066196, "grad_norm": 0.4336373209953308, "learning_rate": 8.387467384234096e-06, "loss": 0.2335, "step": 2720 }, { "epoch": 0.6012797881729921, "grad_norm": 0.4600653648376465, "learning_rate": 8.38075230858277e-06, "loss": 0.2365, "step": 2725 }, { "epoch": 0.6023830538393645, "grad_norm": 0.5015227198600769, "learning_rate": 8.37402598049201e-06, "loss": 0.2502, "step": 2730 }, { "epoch": 0.603486319505737, "grad_norm": 0.552075207233429, "learning_rate": 8.367288422349617e-06, "loss": 0.2403, "step": 2735 }, { "epoch": 0.6045895851721095, "grad_norm": 0.4628050923347473, "learning_rate": 8.360539656580768e-06, "loss": 0.2294, "step": 2740 }, { "epoch": 0.6056928508384819, "grad_norm": 0.48730120062828064, "learning_rate": 8.353779705647936e-06, "loss": 0.2397, "step": 2745 }, { "epoch": 0.6067961165048543, "grad_norm": 0.40887823700904846, "learning_rate": 8.347008592050834e-06, "loss": 0.2491, "step": 2750 }, { "epoch": 0.6078993821712269, "grad_norm": 0.4346201419830322, "learning_rate": 8.340226338326321e-06, "loss": 0.2436, "step": 2755 }, { "epoch": 0.6090026478375993, "grad_norm": 0.38802462816238403, "learning_rate": 8.333432967048339e-06, "loss": 0.2379, "step": 2760 }, { "epoch": 0.6101059135039718, "grad_norm": 0.3992108404636383, "learning_rate": 8.326628500827826e-06, "loss": 0.2338, "step": 2765 }, { "epoch": 0.6112091791703442, "grad_norm": 0.44411781430244446, "learning_rate": 8.319812962312662e-06, "loss": 0.2301, "step": 2770 }, { "epoch": 0.6123124448367167, "grad_norm": 0.42220309376716614, "learning_rate": 8.312986374187563e-06, "loss": 0.238, "step": 2775 }, { "epoch": 0.6134157105030892, "grad_norm": 0.47081899642944336, "learning_rate": 8.306148759174036e-06, "loss": 0.2536, "step": 2780 }, { "epoch": 0.6145189761694616, "grad_norm": 0.4740568995475769, "learning_rate": 8.299300140030283e-06, "loss": 0.2494, "step": 2785 }, { "epoch": 0.615622241835834, "grad_norm": 0.41311606764793396, "learning_rate": 8.292440539551132e-06, "loss": 0.2443, "step": 2790 }, { "epoch": 0.6167255075022066, "grad_norm": 0.43672001361846924, "learning_rate": 8.285569980567965e-06, "loss": 0.2386, "step": 2795 }, { "epoch": 0.617828773168579, "grad_norm": 0.45961281657218933, "learning_rate": 8.278688485948634e-06, "loss": 0.2471, "step": 2800 }, { "epoch": 0.617828773168579, "eval_loss": 0.24707245826721191, "eval_runtime": 271.8779, "eval_samples_per_second": 56.139, "eval_steps_per_second": 7.018, "step": 2800 }, { "epoch": 0.6189320388349514, "grad_norm": 0.41084882616996765, "learning_rate": 8.27179607859739e-06, "loss": 0.2353, "step": 2805 }, { "epoch": 0.6200353045013239, "grad_norm": 0.40242356061935425, "learning_rate": 8.264892781454807e-06, "loss": 0.2259, "step": 2810 }, { "epoch": 0.6211385701676964, "grad_norm": 0.43410569429397583, "learning_rate": 8.257978617497706e-06, "loss": 0.2375, "step": 2815 }, { "epoch": 0.6222418358340689, "grad_norm": 0.49452638626098633, "learning_rate": 8.25105360973907e-06, "loss": 0.2472, "step": 2820 }, { "epoch": 0.6233451015004413, "grad_norm": 0.5670908689498901, "learning_rate": 8.244117781227982e-06, "loss": 0.2434, "step": 2825 }, { "epoch": 0.6244483671668137, "grad_norm": 0.4820459485054016, "learning_rate": 8.237171155049539e-06, "loss": 0.2393, "step": 2830 }, { "epoch": 0.6255516328331863, "grad_norm": 0.455879271030426, "learning_rate": 8.230213754324773e-06, "loss": 0.2269, "step": 2835 }, { "epoch": 0.6266548984995587, "grad_norm": 0.41106078028678894, "learning_rate": 8.22324560221058e-06, "loss": 0.2291, "step": 2840 }, { "epoch": 0.6277581641659311, "grad_norm": 0.39285704493522644, "learning_rate": 8.216266721899642e-06, "loss": 0.2357, "step": 2845 }, { "epoch": 0.6288614298323036, "grad_norm": 0.3971237242221832, "learning_rate": 8.209277136620348e-06, "loss": 0.2444, "step": 2850 }, { "epoch": 0.6299646954986761, "grad_norm": 0.47871458530426025, "learning_rate": 8.202276869636713e-06, "loss": 0.2357, "step": 2855 }, { "epoch": 0.6310679611650486, "grad_norm": 0.4452441930770874, "learning_rate": 8.195265944248315e-06, "loss": 0.237, "step": 2860 }, { "epoch": 0.632171226831421, "grad_norm": 0.5275673866271973, "learning_rate": 8.188244383790196e-06, "loss": 0.2536, "step": 2865 }, { "epoch": 0.6332744924977934, "grad_norm": 0.46344441175460815, "learning_rate": 8.1812122116328e-06, "loss": 0.2437, "step": 2870 }, { "epoch": 0.634377758164166, "grad_norm": 0.4381890892982483, "learning_rate": 8.174169451181893e-06, "loss": 0.2488, "step": 2875 }, { "epoch": 0.6354810238305384, "grad_norm": 0.41612380743026733, "learning_rate": 8.167116125878483e-06, "loss": 0.239, "step": 2880 }, { "epoch": 0.6365842894969108, "grad_norm": 0.4481007754802704, "learning_rate": 8.160052259198737e-06, "loss": 0.2395, "step": 2885 }, { "epoch": 0.6376875551632833, "grad_norm": 0.42456308007240295, "learning_rate": 8.152977874653909e-06, "loss": 0.2303, "step": 2890 }, { "epoch": 0.6387908208296558, "grad_norm": 0.4462544322013855, "learning_rate": 8.145892995790269e-06, "loss": 0.2476, "step": 2895 }, { "epoch": 0.6398940864960282, "grad_norm": 0.37666749954223633, "learning_rate": 8.138797646189e-06, "loss": 0.2326, "step": 2900 }, { "epoch": 0.6398940864960282, "eval_loss": 0.24663805961608887, "eval_runtime": 271.9733, "eval_samples_per_second": 56.119, "eval_steps_per_second": 7.015, "step": 2900 }, { "epoch": 0.6409973521624007, "grad_norm": 0.4413769841194153, "learning_rate": 8.131691849466154e-06, "loss": 0.2347, "step": 2905 }, { "epoch": 0.6421006178287731, "grad_norm": 0.5462684035301208, "learning_rate": 8.12457562927254e-06, "loss": 0.2491, "step": 2910 }, { "epoch": 0.6432038834951457, "grad_norm": 0.47332948446273804, "learning_rate": 8.117449009293668e-06, "loss": 0.2387, "step": 2915 }, { "epoch": 0.6443071491615181, "grad_norm": 0.4280896484851837, "learning_rate": 8.11031201324966e-06, "loss": 0.2347, "step": 2920 }, { "epoch": 0.6454104148278905, "grad_norm": 0.46633732318878174, "learning_rate": 8.103164664895179e-06, "loss": 0.2528, "step": 2925 }, { "epoch": 0.646513680494263, "grad_norm": 0.45853593945503235, "learning_rate": 8.096006988019331e-06, "loss": 0.2329, "step": 2930 }, { "epoch": 0.6476169461606355, "grad_norm": 0.4461853802204132, "learning_rate": 8.088839006445615e-06, "loss": 0.2436, "step": 2935 }, { "epoch": 0.6487202118270079, "grad_norm": 0.4614443778991699, "learning_rate": 8.081660744031818e-06, "loss": 0.2442, "step": 2940 }, { "epoch": 0.6498234774933804, "grad_norm": 0.4097602367401123, "learning_rate": 8.074472224669952e-06, "loss": 0.2398, "step": 2945 }, { "epoch": 0.6509267431597529, "grad_norm": 0.5019506216049194, "learning_rate": 8.067273472286158e-06, "loss": 0.2488, "step": 2950 }, { "epoch": 0.6520300088261254, "grad_norm": 0.4480745196342468, "learning_rate": 8.060064510840648e-06, "loss": 0.2268, "step": 2955 }, { "epoch": 0.6531332744924978, "grad_norm": 0.44799327850341797, "learning_rate": 8.052845364327609e-06, "loss": 0.2407, "step": 2960 }, { "epoch": 0.6542365401588702, "grad_norm": 0.4316900670528412, "learning_rate": 8.045616056775124e-06, "loss": 0.2449, "step": 2965 }, { "epoch": 0.6553398058252428, "grad_norm": 0.42375341057777405, "learning_rate": 8.038376612245104e-06, "loss": 0.2363, "step": 2970 }, { "epoch": 0.6564430714916152, "grad_norm": 0.48923903703689575, "learning_rate": 8.031127054833192e-06, "loss": 0.2409, "step": 2975 }, { "epoch": 0.6575463371579876, "grad_norm": 0.41415655612945557, "learning_rate": 8.023867408668692e-06, "loss": 0.2335, "step": 2980 }, { "epoch": 0.6586496028243601, "grad_norm": 0.47558680176734924, "learning_rate": 8.016597697914492e-06, "loss": 0.2485, "step": 2985 }, { "epoch": 0.6597528684907326, "grad_norm": 0.4345654845237732, "learning_rate": 8.009317946766975e-06, "loss": 0.2445, "step": 2990 }, { "epoch": 0.660856134157105, "grad_norm": 0.4340679347515106, "learning_rate": 8.002028179455941e-06, "loss": 0.2403, "step": 2995 }, { "epoch": 0.6619593998234775, "grad_norm": 0.45394837856292725, "learning_rate": 7.994728420244533e-06, "loss": 0.2516, "step": 3000 }, { "epoch": 0.6619593998234775, "eval_loss": 0.24612218141555786, "eval_runtime": 271.5712, "eval_samples_per_second": 56.203, "eval_steps_per_second": 7.026, "step": 3000 }, { "epoch": 0.6630626654898499, "grad_norm": 0.4266812205314636, "learning_rate": 7.987418693429145e-06, "loss": 0.2421, "step": 3005 }, { "epoch": 0.6641659311562225, "grad_norm": 0.44489166140556335, "learning_rate": 7.98009902333935e-06, "loss": 0.2249, "step": 3010 }, { "epoch": 0.6652691968225949, "grad_norm": 0.3864096701145172, "learning_rate": 7.972769434337815e-06, "loss": 0.238, "step": 3015 }, { "epoch": 0.6663724624889673, "grad_norm": 0.5136005878448486, "learning_rate": 7.965429950820222e-06, "loss": 0.233, "step": 3020 }, { "epoch": 0.6674757281553398, "grad_norm": 0.4214404225349426, "learning_rate": 7.958080597215187e-06, "loss": 0.2382, "step": 3025 }, { "epoch": 0.6685789938217123, "grad_norm": 0.3963940441608429, "learning_rate": 7.95072139798417e-06, "loss": 0.2413, "step": 3030 }, { "epoch": 0.6696822594880847, "grad_norm": 0.46489056944847107, "learning_rate": 7.943352377621414e-06, "loss": 0.2405, "step": 3035 }, { "epoch": 0.6707855251544572, "grad_norm": 0.47178593277931213, "learning_rate": 7.935973560653838e-06, "loss": 0.2347, "step": 3040 }, { "epoch": 0.6718887908208296, "grad_norm": 0.43894341588020325, "learning_rate": 7.928584971640974e-06, "loss": 0.2443, "step": 3045 }, { "epoch": 0.6729920564872022, "grad_norm": 0.39241307973861694, "learning_rate": 7.92118663517488e-06, "loss": 0.249, "step": 3050 }, { "epoch": 0.6740953221535746, "grad_norm": 0.4197879731655121, "learning_rate": 7.913778575880054e-06, "loss": 0.2229, "step": 3055 }, { "epoch": 0.675198587819947, "grad_norm": 0.4493717551231384, "learning_rate": 7.906360818413354e-06, "loss": 0.2385, "step": 3060 }, { "epoch": 0.6763018534863195, "grad_norm": 0.4284787178039551, "learning_rate": 7.898933387463924e-06, "loss": 0.2398, "step": 3065 }, { "epoch": 0.677405119152692, "grad_norm": 0.4555470943450928, "learning_rate": 7.891496307753099e-06, "loss": 0.2395, "step": 3070 }, { "epoch": 0.6785083848190644, "grad_norm": 0.4759092628955841, "learning_rate": 7.884049604034331e-06, "loss": 0.2441, "step": 3075 }, { "epoch": 0.6796116504854369, "grad_norm": 0.421332448720932, "learning_rate": 7.876593301093104e-06, "loss": 0.2416, "step": 3080 }, { "epoch": 0.6807149161518093, "grad_norm": 0.5813837051391602, "learning_rate": 7.869127423746852e-06, "loss": 0.2387, "step": 3085 }, { "epoch": 0.6818181818181818, "grad_norm": 0.4229418933391571, "learning_rate": 7.861651996844877e-06, "loss": 0.2359, "step": 3090 }, { "epoch": 0.6829214474845543, "grad_norm": 0.4013502597808838, "learning_rate": 7.854167045268265e-06, "loss": 0.2408, "step": 3095 }, { "epoch": 0.6840247131509267, "grad_norm": 0.46940287947654724, "learning_rate": 7.8466725939298e-06, "loss": 0.2254, "step": 3100 }, { "epoch": 0.6840247131509267, "eval_loss": 0.24580919742584229, "eval_runtime": 268.9469, "eval_samples_per_second": 56.751, "eval_steps_per_second": 7.094, "step": 3100 }, { "epoch": 0.6851279788172993, "grad_norm": 0.43689653277397156, "learning_rate": 7.839168667773891e-06, "loss": 0.248, "step": 3105 }, { "epoch": 0.6862312444836717, "grad_norm": 0.4291225075721741, "learning_rate": 7.831655291776484e-06, "loss": 0.2554, "step": 3110 }, { "epoch": 0.6873345101500441, "grad_norm": 0.3945559859275818, "learning_rate": 7.824132490944968e-06, "loss": 0.229, "step": 3115 }, { "epoch": 0.6884377758164166, "grad_norm": 0.4158150553703308, "learning_rate": 7.81660029031811e-06, "loss": 0.2422, "step": 3120 }, { "epoch": 0.689541041482789, "grad_norm": 0.4768913984298706, "learning_rate": 7.809058714965962e-06, "loss": 0.2384, "step": 3125 }, { "epoch": 0.6906443071491615, "grad_norm": 0.4020480811595917, "learning_rate": 7.801507789989775e-06, "loss": 0.2327, "step": 3130 }, { "epoch": 0.691747572815534, "grad_norm": 0.4478599429130554, "learning_rate": 7.793947540521922e-06, "loss": 0.2507, "step": 3135 }, { "epoch": 0.6928508384819064, "grad_norm": 0.4232751727104187, "learning_rate": 7.786377991725813e-06, "loss": 0.2451, "step": 3140 }, { "epoch": 0.693954104148279, "grad_norm": 0.4200434982776642, "learning_rate": 7.778799168795804e-06, "loss": 0.2394, "step": 3145 }, { "epoch": 0.6950573698146514, "grad_norm": 0.5268550515174866, "learning_rate": 7.771211096957125e-06, "loss": 0.2328, "step": 3150 }, { "epoch": 0.6961606354810238, "grad_norm": 0.48702338337898254, "learning_rate": 7.763613801465785e-06, "loss": 0.2417, "step": 3155 }, { "epoch": 0.6972639011473963, "grad_norm": 0.4694213569164276, "learning_rate": 7.756007307608498e-06, "loss": 0.2505, "step": 3160 }, { "epoch": 0.6983671668137688, "grad_norm": 0.40955081582069397, "learning_rate": 7.748391640702588e-06, "loss": 0.2401, "step": 3165 }, { "epoch": 0.6994704324801412, "grad_norm": 0.45171940326690674, "learning_rate": 7.740766826095918e-06, "loss": 0.23, "step": 3170 }, { "epoch": 0.7005736981465137, "grad_norm": 0.4558034837245941, "learning_rate": 7.733132889166788e-06, "loss": 0.2417, "step": 3175 }, { "epoch": 0.7016769638128861, "grad_norm": 0.4197918474674225, "learning_rate": 7.725489855323869e-06, "loss": 0.2432, "step": 3180 }, { "epoch": 0.7027802294792586, "grad_norm": 0.4640055000782013, "learning_rate": 7.717837750006106e-06, "loss": 0.2387, "step": 3185 }, { "epoch": 0.7038834951456311, "grad_norm": 0.41201338171958923, "learning_rate": 7.710176598682639e-06, "loss": 0.2253, "step": 3190 }, { "epoch": 0.7049867608120035, "grad_norm": 0.4629385769367218, "learning_rate": 7.702506426852715e-06, "loss": 0.2473, "step": 3195 }, { "epoch": 0.706090026478376, "grad_norm": 0.41098853945732117, "learning_rate": 7.694827260045608e-06, "loss": 0.2454, "step": 3200 }, { "epoch": 0.706090026478376, "eval_loss": 0.24525980651378632, "eval_runtime": 270.0878, "eval_samples_per_second": 56.511, "eval_steps_per_second": 7.064, "step": 3200 }, { "epoch": 0.7071932921447485, "grad_norm": 0.43668264150619507, "learning_rate": 7.687139123820526e-06, "loss": 0.2469, "step": 3205 }, { "epoch": 0.7082965578111209, "grad_norm": 0.3744730055332184, "learning_rate": 7.679442043766534e-06, "loss": 0.2336, "step": 3210 }, { "epoch": 0.7093998234774934, "grad_norm": 0.41323620080947876, "learning_rate": 7.671736045502462e-06, "loss": 0.2459, "step": 3215 }, { "epoch": 0.7105030891438658, "grad_norm": 0.4456137716770172, "learning_rate": 7.664021154676828e-06, "loss": 0.2491, "step": 3220 }, { "epoch": 0.7116063548102383, "grad_norm": 0.4377335011959076, "learning_rate": 7.656297396967747e-06, "loss": 0.2395, "step": 3225 }, { "epoch": 0.7127096204766108, "grad_norm": 0.4323022961616516, "learning_rate": 7.648564798082842e-06, "loss": 0.2403, "step": 3230 }, { "epoch": 0.7138128861429832, "grad_norm": 0.4660840630531311, "learning_rate": 7.640823383759169e-06, "loss": 0.2532, "step": 3235 }, { "epoch": 0.7149161518093556, "grad_norm": 0.4233240485191345, "learning_rate": 7.63307317976312e-06, "loss": 0.24, "step": 3240 }, { "epoch": 0.7160194174757282, "grad_norm": 0.48266658186912537, "learning_rate": 7.625314211890342e-06, "loss": 0.2426, "step": 3245 }, { "epoch": 0.7171226831421006, "grad_norm": 0.3970603346824646, "learning_rate": 7.617546505965658e-06, "loss": 0.2278, "step": 3250 }, { "epoch": 0.7182259488084731, "grad_norm": 0.4572686553001404, "learning_rate": 7.609770087842969e-06, "loss": 0.242, "step": 3255 }, { "epoch": 0.7193292144748455, "grad_norm": 0.40155845880508423, "learning_rate": 7.601984983405173e-06, "loss": 0.2285, "step": 3260 }, { "epoch": 0.720432480141218, "grad_norm": 0.4204133152961731, "learning_rate": 7.594191218564084e-06, "loss": 0.2353, "step": 3265 }, { "epoch": 0.7215357458075905, "grad_norm": 0.4386295676231384, "learning_rate": 7.586388819260338e-06, "loss": 0.2228, "step": 3270 }, { "epoch": 0.7226390114739629, "grad_norm": 0.43028250336647034, "learning_rate": 7.57857781146331e-06, "loss": 0.2328, "step": 3275 }, { "epoch": 0.7237422771403353, "grad_norm": 0.4137963056564331, "learning_rate": 7.5707582211710265e-06, "loss": 0.2406, "step": 3280 }, { "epoch": 0.7248455428067079, "grad_norm": 0.3798144459724426, "learning_rate": 7.562930074410084e-06, "loss": 0.2425, "step": 3285 }, { "epoch": 0.7259488084730803, "grad_norm": 0.3895861804485321, "learning_rate": 7.555093397235553e-06, "loss": 0.2312, "step": 3290 }, { "epoch": 0.7270520741394528, "grad_norm": 0.44680020213127136, "learning_rate": 7.5472482157308975e-06, "loss": 0.23, "step": 3295 }, { "epoch": 0.7281553398058253, "grad_norm": 0.4767349064350128, "learning_rate": 7.539394556007892e-06, "loss": 0.2482, "step": 3300 }, { "epoch": 0.7281553398058253, "eval_loss": 0.24486024677753448, "eval_runtime": 269.7229, "eval_samples_per_second": 56.588, "eval_steps_per_second": 7.074, "step": 3300 }, { "epoch": 0.7292586054721977, "grad_norm": 0.41484782099723816, "learning_rate": 7.531532444206524e-06, "loss": 0.2333, "step": 3305 }, { "epoch": 0.7303618711385702, "grad_norm": 0.4434278607368469, "learning_rate": 7.523661906494913e-06, "loss": 0.2393, "step": 3310 }, { "epoch": 0.7314651368049426, "grad_norm": 0.44898828864097595, "learning_rate": 7.515782969069229e-06, "loss": 0.2342, "step": 3315 }, { "epoch": 0.732568402471315, "grad_norm": 0.4551832973957062, "learning_rate": 7.507895658153594e-06, "loss": 0.2459, "step": 3320 }, { "epoch": 0.7336716681376876, "grad_norm": 0.464346706867218, "learning_rate": 7.500000000000001e-06, "loss": 0.2178, "step": 3325 }, { "epoch": 0.73477493380406, "grad_norm": 0.43785154819488525, "learning_rate": 7.492096020888227e-06, "loss": 0.2378, "step": 3330 }, { "epoch": 0.7358781994704324, "grad_norm": 0.4068206548690796, "learning_rate": 7.484183747125743e-06, "loss": 0.2302, "step": 3335 }, { "epoch": 0.736981465136805, "grad_norm": 0.40867307782173157, "learning_rate": 7.476263205047629e-06, "loss": 0.2403, "step": 3340 }, { "epoch": 0.7380847308031774, "grad_norm": 0.4066210687160492, "learning_rate": 7.468334421016486e-06, "loss": 0.2334, "step": 3345 }, { "epoch": 0.7391879964695499, "grad_norm": 0.43838587403297424, "learning_rate": 7.460397421422346e-06, "loss": 0.231, "step": 3350 }, { "epoch": 0.7402912621359223, "grad_norm": 0.4866897761821747, "learning_rate": 7.452452232682585e-06, "loss": 0.2513, "step": 3355 }, { "epoch": 0.7413945278022948, "grad_norm": 0.5071978569030762, "learning_rate": 7.444498881241835e-06, "loss": 0.2495, "step": 3360 }, { "epoch": 0.7424977934686673, "grad_norm": 0.5105838775634766, "learning_rate": 7.4365373935719e-06, "loss": 0.242, "step": 3365 }, { "epoch": 0.7436010591350397, "grad_norm": 0.513029932975769, "learning_rate": 7.428567796171662e-06, "loss": 0.2468, "step": 3370 }, { "epoch": 0.7447043248014121, "grad_norm": 0.396241158246994, "learning_rate": 7.420590115566995e-06, "loss": 0.2283, "step": 3375 }, { "epoch": 0.7458075904677847, "grad_norm": 0.43795159459114075, "learning_rate": 7.412604378310677e-06, "loss": 0.2304, "step": 3380 }, { "epoch": 0.7469108561341571, "grad_norm": 0.42680585384368896, "learning_rate": 7.4046106109823045e-06, "loss": 0.2294, "step": 3385 }, { "epoch": 0.7480141218005296, "grad_norm": 0.5321316123008728, "learning_rate": 7.3966088401881975e-06, "loss": 0.2378, "step": 3390 }, { "epoch": 0.749117387466902, "grad_norm": 0.4479999244213104, "learning_rate": 7.388599092561315e-06, "loss": 0.2333, "step": 3395 }, { "epoch": 0.7502206531332745, "grad_norm": 0.5410358309745789, "learning_rate": 7.380581394761169e-06, "loss": 0.2429, "step": 3400 }, { "epoch": 0.7502206531332745, "eval_loss": 0.24444225430488586, "eval_runtime": 273.7667, "eval_samples_per_second": 55.752, "eval_steps_per_second": 6.969, "step": 3400 } ], "logging_steps": 5, "max_steps": 9064, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.662905189123752e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }