{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.469850121993726, "eval_steps": 500, "global_step": 2109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006971070059254096, "grad_norm": 2.962728261947632, "learning_rate": 0.0, "loss": 1.1855, "step": 1 }, { "epoch": 0.03485535029627048, "grad_norm": 1.953801155090332, "learning_rate": 1.9999434046461045e-05, "loss": 0.9795, "step": 50 }, { "epoch": 0.06971070059254096, "grad_norm": 1.6633161306381226, "learning_rate": 1.996249692618611e-05, "loss": 0.8103, "step": 100 }, { "epoch": 0.10456605088881143, "grad_norm": 1.8373701572418213, "learning_rate": 1.9868053167196865e-05, "loss": 0.7672, "step": 150 }, { "epoch": 0.13942140118508192, "grad_norm": 1.503631830215454, "learning_rate": 1.971664792831919e-05, "loss": 0.7452, "step": 200 }, { "epoch": 0.17427675148135238, "grad_norm": 1.5163464546203613, "learning_rate": 1.9509155167802316e-05, "loss": 0.7248, "step": 250 }, { "epoch": 0.20913210177762287, "grad_norm": 1.4677717685699463, "learning_rate": 1.9246772598559302e-05, "loss": 0.7107, "step": 300 }, { "epoch": 0.24398745207389333, "grad_norm": 1.4698961973190308, "learning_rate": 1.8931014774594656e-05, "loss": 0.7002, "step": 350 }, { "epoch": 0.27884280237016384, "grad_norm": 1.5015199184417725, "learning_rate": 1.8563704348526337e-05, "loss": 0.6954, "step": 400 }, { "epoch": 0.3136981526664343, "grad_norm": 1.4707640409469604, "learning_rate": 1.8146961550666525e-05, "loss": 0.6895, "step": 450 }, { "epoch": 0.34855350296270476, "grad_norm": 1.4305927753448486, "learning_rate": 1.7683191950391142e-05, "loss": 0.6779, "step": 500 }, { "epoch": 0.3834088532589753, "grad_norm": 1.4896072149276733, "learning_rate": 1.717507257044331e-05, "loss": 0.6669, "step": 550 }, { "epoch": 0.41826420355524574, "grad_norm": 1.4289474487304688, "learning_rate": 1.6625536434323358e-05, "loss": 0.663, "step": 600 }, { "epoch": 0.4531195538515162, "grad_norm": 1.3936972618103027, "learning_rate": 1.6037755635962587e-05, "loss": 0.6581, "step": 650 }, { "epoch": 0.48797490414778666, "grad_norm": 1.4336333274841309, "learning_rate": 1.5415123029408046e-05, "loss": 0.6487, "step": 700 }, { "epoch": 0.5228302544440572, "grad_norm": 1.4300156831741333, "learning_rate": 1.4761232644210963e-05, "loss": 0.645, "step": 750 }, { "epoch": 0.5576856047403277, "grad_norm": 1.3896256685256958, "learning_rate": 1.4079858939567557e-05, "loss": 0.6366, "step": 800 }, { "epoch": 0.5925409550365981, "grad_norm": 1.2966829538345337, "learning_rate": 1.3374935016963595e-05, "loss": 0.6314, "step": 850 }, { "epoch": 0.6273963053328686, "grad_norm": 1.304624319076538, "learning_rate": 1.2650529917086232e-05, "loss": 0.6287, "step": 900 }, { "epoch": 0.6622516556291391, "grad_norm": 1.3826805353164673, "learning_rate": 1.1910825132052356e-05, "loss": 0.6233, "step": 950 }, { "epoch": 0.6971070059254095, "grad_norm": 1.3912091255187988, "learning_rate": 1.1160090468532266e-05, "loss": 0.6255, "step": 1000 }, { "epoch": 0.73196235622168, "grad_norm": 1.2622112035751343, "learning_rate": 1.0402659401094154e-05, "loss": 0.6141, "step": 1050 }, { "epoch": 0.7668177065179506, "grad_norm": 1.3311121463775635, "learning_rate": 9.642904058037667e-06, "loss": 0.6095, "step": 1100 }, { "epoch": 0.801673056814221, "grad_norm": 1.393316626548767, "learning_rate": 8.885209984106072e-06, "loss": 0.6027, "step": 1150 }, { "epoch": 0.8365284071104915, "grad_norm": 1.3410212993621826, "learning_rate": 8.133950825754511e-06, "loss": 0.6068, "step": 1200 }, { "epoch": 0.8713837574067619, "grad_norm": 1.365545392036438, "learning_rate": 7.393463085098886e-06, "loss": 0.6012, "step": 1250 }, { "epoch": 0.9062391077030324, "grad_norm": 1.3484421968460083, "learning_rate": 6.6680210882734805e-06, "loss": 0.6005, "step": 1300 }, { "epoch": 0.9410944579993029, "grad_norm": 1.7153867483139038, "learning_rate": 5.961812312687689e-06, "loss": 0.594, "step": 1350 }, { "epoch": 0.9759498082955733, "grad_norm": 1.308719515800476, "learning_rate": 5.278913215600714e-06, "loss": 0.5852, "step": 1400 }, { "epoch": 1.0104566050888812, "grad_norm": 1.4866747856140137, "learning_rate": 4.623265703539146e-06, "loss": 0.5385, "step": 1450 }, { "epoch": 1.0453119553851515, "grad_norm": 1.4509928226470947, "learning_rate": 3.998654378383361e-06, "loss": 0.4257, "step": 1500 }, { "epoch": 1.080167305681422, "grad_norm": 1.2705004215240479, "learning_rate": 3.408684691465355e-06, "loss": 0.4253, "step": 1550 }, { "epoch": 1.1150226559776926, "grad_norm": 1.35167396068573, "learning_rate": 2.85676213177945e-06, "loss": 0.4221, "step": 1600 }, { "epoch": 1.149878006273963, "grad_norm": 1.313473105430603, "learning_rate": 2.3460725684379002e-06, "loss": 0.4244, "step": 1650 }, { "epoch": 1.1847333565702336, "grad_norm": 1.3406174182891846, "learning_rate": 1.8795638608410016e-06, "loss": 0.4162, "step": 1700 }, { "epoch": 1.219588706866504, "grad_norm": 1.263272762298584, "learning_rate": 1.4599288427134283e-06, "loss": 0.4189, "step": 1750 }, { "epoch": 1.2544440571627744, "grad_norm": 1.3713723421096802, "learning_rate": 1.0895897782283305e-06, "loss": 0.4181, "step": 1800 }, { "epoch": 1.289299407459045, "grad_norm": 1.3794862031936646, "learning_rate": 7.706843799431985e-07, "loss": 0.4166, "step": 1850 }, { "epoch": 1.3241547577553154, "grad_norm": 1.3196351528167725, "learning_rate": 5.050534692564358e-07, "loss": 0.4125, "step": 1900 }, { "epoch": 1.359010108051586, "grad_norm": 1.3097676038742065, "learning_rate": 2.94230350612239e-07, "loss": 0.4089, "step": 1950 }, { "epoch": 1.3938654583478565, "grad_norm": 1.329222559928894, "learning_rate": 1.3943196078924247e-07, "loss": 0.4135, "step": 2000 }, { "epoch": 1.428720808644127, "grad_norm": 1.3468352556228638, "learning_rate": 4.155184436196669e-08, "loss": 0.4138, "step": 2050 }, { "epoch": 1.4635761589403973, "grad_norm": 1.3396908044815063, "learning_rate": 1.154995882924892e-09, "loss": 0.4112, "step": 2100 }, { "epoch": 1.469850121993726, "step": 2109, "total_flos": 5.7513841193385984e+17, "train_loss": 0.5876018210235169, "train_runtime": 8852.0342, "train_samples_per_second": 1.906, "train_steps_per_second": 0.238 } ], "logging_steps": 50, "max_steps": 2109, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.7513841193385984e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }