{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.469850121993726, "eval_steps": 500, "global_step": 2109, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006971070059254096, "grad_norm": 5.6915082931518555, "learning_rate": 0.0, "loss": 1.5972, "step": 1 }, { "epoch": 0.03485535029627048, "grad_norm": 3.605487108230591, "learning_rate": 1.9999434046461045e-05, "loss": 1.348, "step": 50 }, { "epoch": 0.06971070059254096, "grad_norm": 3.076913595199585, "learning_rate": 1.996249692618611e-05, "loss": 1.1239, "step": 100 }, { "epoch": 0.10456605088881143, "grad_norm": 3.1423285007476807, "learning_rate": 1.9868053167196865e-05, "loss": 1.0576, "step": 150 }, { "epoch": 0.13942140118508192, "grad_norm": 2.7687857151031494, "learning_rate": 1.971664792831919e-05, "loss": 1.0202, "step": 200 }, { "epoch": 0.17427675148135238, "grad_norm": 2.9235846996307373, "learning_rate": 1.9509155167802316e-05, "loss": 0.9927, "step": 250 }, { "epoch": 0.20913210177762287, "grad_norm": 2.6419451236724854, "learning_rate": 1.9246772598559302e-05, "loss": 0.9707, "step": 300 }, { "epoch": 0.24398745207389333, "grad_norm": 2.7948801517486572, "learning_rate": 1.8931014774594656e-05, "loss": 0.9556, "step": 350 }, { "epoch": 0.27884280237016384, "grad_norm": 2.7692606449127197, "learning_rate": 1.8563704348526337e-05, "loss": 0.9504, "step": 400 }, { "epoch": 0.3136981526664343, "grad_norm": 2.7550599575042725, "learning_rate": 1.8146961550666525e-05, "loss": 0.9416, "step": 450 }, { "epoch": 0.34855350296270476, "grad_norm": 2.628190517425537, "learning_rate": 1.7683191950391142e-05, "loss": 0.9246, "step": 500 }, { "epoch": 0.3834088532589753, "grad_norm": 2.7417476177215576, "learning_rate": 1.717507257044331e-05, "loss": 0.9055, "step": 550 }, { "epoch": 0.41826420355524574, "grad_norm": 2.638144016265869, "learning_rate": 1.6625536434323358e-05, "loss": 0.9031, "step": 600 }, { "epoch": 0.4531195538515162, "grad_norm": 2.6510379314422607, "learning_rate": 1.6037755635962587e-05, "loss": 0.9003, "step": 650 }, { "epoch": 0.48797490414778666, "grad_norm": 2.6764113903045654, "learning_rate": 1.5415123029408046e-05, "loss": 0.8829, "step": 700 }, { "epoch": 0.5228302544440572, "grad_norm": 2.693927764892578, "learning_rate": 1.4761232644210963e-05, "loss": 0.8813, "step": 750 }, { "epoch": 0.5576856047403277, "grad_norm": 2.5667612552642822, "learning_rate": 1.4079858939567557e-05, "loss": 0.8739, "step": 800 }, { "epoch": 0.5925409550365981, "grad_norm": 2.4323246479034424, "learning_rate": 1.3374935016963595e-05, "loss": 0.8617, "step": 850 }, { "epoch": 0.6273963053328686, "grad_norm": 2.5843608379364014, "learning_rate": 1.2650529917086232e-05, "loss": 0.8637, "step": 900 }, { "epoch": 0.6622516556291391, "grad_norm": 2.498666286468506, "learning_rate": 1.1910825132052356e-05, "loss": 0.8522, "step": 950 }, { "epoch": 0.6971070059254095, "grad_norm": 2.595188617706299, "learning_rate": 1.1160090468532266e-05, "loss": 0.8607, "step": 1000 }, { "epoch": 0.73196235622168, "grad_norm": 2.4613826274871826, "learning_rate": 1.0402659401094154e-05, "loss": 0.8424, "step": 1050 }, { "epoch": 0.7668177065179506, "grad_norm": 2.597670078277588, "learning_rate": 9.642904058037667e-06, "loss": 0.8387, "step": 1100 }, { "epoch": 0.801673056814221, "grad_norm": 2.4956326484680176, "learning_rate": 8.885209984106072e-06, "loss": 0.83, "step": 1150 }, { "epoch": 0.8365284071104915, "grad_norm": 2.4177091121673584, "learning_rate": 8.133950825754511e-06, "loss": 0.84, "step": 1200 }, { "epoch": 0.8713837574067619, "grad_norm": 2.42484712600708, "learning_rate": 7.393463085098886e-06, "loss": 0.8304, "step": 1250 }, { "epoch": 0.9062391077030324, "grad_norm": 2.5396852493286133, "learning_rate": 6.6680210882734805e-06, "loss": 0.8315, "step": 1300 }, { "epoch": 0.9410944579993029, "grad_norm": 2.5951361656188965, "learning_rate": 5.961812312687689e-06, "loss": 0.8229, "step": 1350 }, { "epoch": 0.9759498082955733, "grad_norm": 2.4725124835968018, "learning_rate": 5.278913215600714e-06, "loss": 0.815, "step": 1400 }, { "epoch": 1.0104566050888812, "grad_norm": 2.5333492755889893, "learning_rate": 4.623265703539146e-06, "loss": 0.7775, "step": 1450 }, { "epoch": 1.0453119553851515, "grad_norm": 2.530529499053955, "learning_rate": 3.998654378383361e-06, "loss": 0.693, "step": 1500 }, { "epoch": 1.080167305681422, "grad_norm": 2.500720262527466, "learning_rate": 3.408684691465355e-06, "loss": 0.7003, "step": 1550 }, { "epoch": 1.1150226559776926, "grad_norm": 2.5019266605377197, "learning_rate": 2.85676213177945e-06, "loss": 0.6936, "step": 1600 }, { "epoch": 1.149878006273963, "grad_norm": 2.41332745552063, "learning_rate": 2.3460725684379002e-06, "loss": 0.699, "step": 1650 }, { "epoch": 1.1847333565702336, "grad_norm": 2.46690034866333, "learning_rate": 1.8795638608410016e-06, "loss": 0.6873, "step": 1700 }, { "epoch": 1.219588706866504, "grad_norm": 2.2801692485809326, "learning_rate": 1.4599288427134283e-06, "loss": 0.6863, "step": 1750 }, { "epoch": 1.2544440571627744, "grad_norm": 2.466907501220703, "learning_rate": 1.0895897782283305e-06, "loss": 0.6915, "step": 1800 }, { "epoch": 1.289299407459045, "grad_norm": 2.507089376449585, "learning_rate": 7.706843799431985e-07, "loss": 0.6878, "step": 1850 }, { "epoch": 1.3241547577553154, "grad_norm": 2.3262147903442383, "learning_rate": 5.050534692564358e-07, "loss": 0.6811, "step": 1900 }, { "epoch": 1.359010108051586, "grad_norm": 2.4431302547454834, "learning_rate": 2.94230350612239e-07, "loss": 0.678, "step": 1950 }, { "epoch": 1.3938654583478565, "grad_norm": 2.3482253551483154, "learning_rate": 1.3943196078924247e-07, "loss": 0.6829, "step": 2000 }, { "epoch": 1.428720808644127, "grad_norm": 2.5430848598480225, "learning_rate": 4.155184436196669e-08, "loss": 0.6795, "step": 2050 }, { "epoch": 1.4635761589403973, "grad_norm": 2.450352430343628, "learning_rate": 1.154995882924892e-09, "loss": 0.6782, "step": 2100 }, { "epoch": 1.469850121993726, "step": 2109, "total_flos": 7.41829675403182e+16, "train_loss": 0.8431025520214372, "train_runtime": 2810.3208, "train_samples_per_second": 6.004, "train_steps_per_second": 0.75 } ], "logging_steps": 50, "max_steps": 2109, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.41829675403182e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }