{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 387, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07759456838021339, "grad_norm": 1.9683642394428182, "learning_rate": 2.307692307692308e-06, "loss": 0.7343237400054932, "step": 10 }, { "epoch": 0.15518913676042678, "grad_norm": 1.4175428237350762, "learning_rate": 4.871794871794872e-06, "loss": 0.5461452007293701, "step": 20 }, { "epoch": 0.23278370514064015, "grad_norm": 0.5442834252561063, "learning_rate": 7.435897435897437e-06, "loss": 0.3490773677825928, "step": 30 }, { "epoch": 0.31037827352085356, "grad_norm": 0.32322946422972365, "learning_rate": 1e-05, "loss": 0.2592954635620117, "step": 40 }, { "epoch": 0.3879728419010669, "grad_norm": 0.24901563193155196, "learning_rate": 9.979639600327522e-06, "loss": 0.2136533737182617, "step": 50 }, { "epoch": 0.4655674102812803, "grad_norm": 0.2047675084448879, "learning_rate": 9.918724219660013e-06, "loss": 0.18301695585250854, "step": 60 }, { "epoch": 0.5431619786614937, "grad_norm": 0.1694310997257767, "learning_rate": 9.817749962596115e-06, "loss": 0.16246029138565063, "step": 70 }, { "epoch": 0.6207565470417071, "grad_norm": 0.22587456656054467, "learning_rate": 9.677539179628005e-06, "loss": 0.14934264421463012, "step": 80 }, { "epoch": 0.6983511154219205, "grad_norm": 0.22154973989105028, "learning_rate": 9.499233769787534e-06, "loss": 0.134801185131073, "step": 90 }, { "epoch": 0.7759456838021338, "grad_norm": 0.2099862635469814, "learning_rate": 9.284285880837947e-06, "loss": 0.13017673492431642, "step": 100 }, { "epoch": 0.8535402521823472, "grad_norm": 0.32230657820182124, "learning_rate": 9.034446082750352e-06, "loss": 0.12214579582214355, "step": 110 }, { "epoch": 0.9311348205625606, "grad_norm": 0.324253054340729, "learning_rate": 8.751749110782013e-06, "loss": 0.12026152610778809, "step": 120 }, { "epoch": 1.0077594568380213, "grad_norm": 0.20488241588612174, "learning_rate": 8.438497294267117e-06, "loss": 0.11126101016998291, "step": 130 }, { "epoch": 1.0853540252182348, "grad_norm": 0.20661218086124847, "learning_rate": 8.097241806078616e-06, "loss": 0.10776399374008179, "step": 140 }, { "epoch": 1.162948593598448, "grad_norm": 0.25468202960165104, "learning_rate": 7.730761885468486e-06, "loss": 0.10431833267211914, "step": 150 }, { "epoch": 1.2405431619786615, "grad_norm": 0.17930064486716413, "learning_rate": 7.342042203498952e-06, "loss": 0.10304663181304932, "step": 160 }, { "epoch": 1.3181377303588748, "grad_norm": 0.20225538073749422, "learning_rate": 6.934248555404197e-06, "loss": 0.09784629344940185, "step": 170 }, { "epoch": 1.3957322987390883, "grad_norm": 0.2256721972453044, "learning_rate": 6.510702077847864e-06, "loss": 0.09537227749824524, "step": 180 }, { "epoch": 1.4733268671193016, "grad_norm": 0.21487787771920072, "learning_rate": 6.074852201055121e-06, "loss": 0.09520423412322998, "step": 190 }, { "epoch": 1.5509214354995149, "grad_norm": 0.17540761321861204, "learning_rate": 5.630248556101448e-06, "loss": 0.09088362455368042, "step": 200 }, { "epoch": 1.6285160038797284, "grad_norm": 0.21743503130668765, "learning_rate": 5.180512066149682e-06, "loss": 0.0899280071258545, "step": 210 }, { "epoch": 1.706110572259942, "grad_norm": 0.20331687416060285, "learning_rate": 4.729305457072913e-06, "loss": 0.0881616234779358, "step": 220 }, { "epoch": 1.7837051406401552, "grad_norm": 0.15781467110120098, "learning_rate": 4.280303427629404e-06, "loss": 0.08638249635696411, "step": 230 }, { "epoch": 1.8612997090203685, "grad_norm": 0.1623620489054104, "learning_rate": 3.8371627221284495e-06, "loss": 0.08716154098510742, "step": 240 }, { "epoch": 1.938894277400582, "grad_norm": 0.15611783173066054, "learning_rate": 3.403492349320101e-06, "loss": 0.08580605983734131, "step": 250 }, { "epoch": 2.0155189136760425, "grad_norm": 0.15287072067575233, "learning_rate": 2.982824190050958e-06, "loss": 0.08316840529441834, "step": 260 }, { "epoch": 2.093113482056256, "grad_norm": 0.1853136112632167, "learning_rate": 2.5785842330619038e-06, "loss": 0.08091338872909545, "step": 270 }, { "epoch": 2.1707080504364695, "grad_norm": 0.14114872525549504, "learning_rate": 2.1940646731880887e-06, "loss": 0.08085420131683349, "step": 280 }, { "epoch": 2.248302618816683, "grad_norm": 0.13643528182686213, "learning_rate": 1.8323970991978823e-06, "loss": 0.08156624436378479, "step": 290 }, { "epoch": 2.325897187196896, "grad_norm": 0.14573681730374075, "learning_rate": 1.4965269896332884e-06, "loss": 0.0808843195438385, "step": 300 }, { "epoch": 2.4034917555771096, "grad_norm": 0.1466398992341211, "learning_rate": 1.1891897243618184e-06, "loss": 0.07979943156242371, "step": 310 }, { "epoch": 2.481086323957323, "grad_norm": 0.12798260710398743, "learning_rate": 9.128883072055411e-07, "loss": 0.08049517869949341, "step": 320 }, { "epoch": 2.558680892337536, "grad_norm": 0.13826353734235647, "learning_rate": 6.698729810778065e-07, "loss": 0.08011389374732972, "step": 330 }, { "epoch": 2.6362754607177497, "grad_norm": 0.1305401343538733, "learning_rate": 4.6212290164521554e-07, "loss": 0.08163015246391296, "step": 340 }, { "epoch": 2.713870029097963, "grad_norm": 0.12804004522045906, "learning_rate": 2.9133001876746004e-07, "loss": 0.08051948547363282, "step": 350 }, { "epoch": 2.7914645974781767, "grad_norm": 0.12808224007612634, "learning_rate": 1.5888529698718347e-07, "loss": 0.07719261646270752, "step": 360 }, { "epoch": 2.86905916585839, "grad_norm": 0.12117673381149041, "learning_rate": 6.58673872923693e-08, "loss": 0.08128957152366638, "step": 370 }, { "epoch": 2.946653734238603, "grad_norm": 0.124324493318766, "learning_rate": 1.3033842410251074e-08, "loss": 0.07743191719055176, "step": 380 }, { "epoch": 3.0, "step": 387, "total_flos": 3081875480379392.0, "train_loss": 0.06056562058377327, "train_runtime": 29609.547, "train_samples_per_second": 6.685, "train_steps_per_second": 0.013 } ], "logging_steps": 10, "max_steps": 387, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3081875480379392.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }