{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 34265, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1459321415541773, "grad_norm": 2.2002170436333888, "learning_rate": 2.915086081120514e-06, "loss": 1.091951904296875, "step": 1000 }, { "epoch": 0.2918642831083546, "grad_norm": 2.3899237660554205, "learning_rate": 5.833090166326233e-06, "loss": 0.9010311279296875, "step": 2000 }, { "epoch": 0.43779642466253194, "grad_norm": 1.7207455491966865, "learning_rate": 8.751094251531953e-06, "loss": 0.8496372680664063, "step": 3000 }, { "epoch": 0.5837285662167092, "grad_norm": 1.49997561253202, "learning_rate": 9.991513345767592e-06, "loss": 0.8293939208984376, "step": 4000 }, { "epoch": 0.7296607077708865, "grad_norm": 1.43919873990257, "learning_rate": 9.936020028278053e-06, "loss": 0.8073157348632812, "step": 5000 }, { "epoch": 0.8755928493250639, "grad_norm": 1.384519444393501, "learning_rate": 9.829343371836088e-06, "loss": 0.7829708251953125, "step": 6000 }, { "epoch": 1.021452024808464, "grad_norm": 1.457416645249146, "learning_rate": 9.672589544454328e-06, "loss": 0.7496401977539062, "step": 7000 }, { "epoch": 1.1673841663626414, "grad_norm": 1.5133027310610572, "learning_rate": 9.46738398205746e-06, "loss": 0.6121725463867187, "step": 8000 }, { "epoch": 1.3133163079168186, "grad_norm": 1.3855612557611925, "learning_rate": 9.215854533761766e-06, "loss": 0.611597412109375, "step": 9000 }, { "epoch": 1.459248449470996, "grad_norm": 1.5802706606106216, "learning_rate": 8.920609397454381e-06, "loss": 0.6123408203125, "step": 10000 }, { "epoch": 1.6051805910251733, "grad_norm": 1.4685873474216664, "learning_rate": 8.584710074466158e-06, "loss": 0.609525390625, "step": 11000 }, { "epoch": 1.7511127325793505, "grad_norm": 1.219859265372452, "learning_rate": 8.211639623780629e-06, "loss": 0.6133668823242188, "step": 12000 }, { "epoch": 1.897044874133528, "grad_norm": 1.2535196330309815, "learning_rate": 7.805266544962458e-06, "loss": 0.615207763671875, "step": 13000 }, { "epoch": 2.042904049616928, "grad_norm": 1.5681754211680612, "learning_rate": 7.3698046643160645e-06, "loss": 0.5413321533203125, "step": 14000 }, { "epoch": 2.1888361911711054, "grad_norm": 1.2447962924993787, "learning_rate": 6.909769440229038e-06, "loss": 0.38200238037109374, "step": 15000 }, { "epoch": 2.334768332725283, "grad_norm": 1.4067667074111327, "learning_rate": 6.4299311407857035e-06, "loss": 0.38497344970703123, "step": 16000 }, { "epoch": 2.48070047427946, "grad_norm": 1.326059120500027, "learning_rate": 5.935265379168761e-06, "loss": 0.3850032653808594, "step": 17000 }, { "epoch": 2.6266326158336373, "grad_norm": 1.6923974019115828, "learning_rate": 5.430901519764892e-06, "loss": 0.38166015625, "step": 18000 }, { "epoch": 2.7725647573878147, "grad_norm": 1.5367782861850559, "learning_rate": 4.9220694899697185e-06, "loss": 0.3862608642578125, "step": 19000 }, { "epoch": 2.918496898941992, "grad_norm": 1.3054321737811847, "learning_rate": 4.414045549219315e-06, "loss": 0.38137054443359375, "step": 20000 }, { "epoch": 3.064356074425392, "grad_norm": 1.69275953279231, "learning_rate": 3.912097577588397e-06, "loss": 0.2938824462890625, "step": 21000 }, { "epoch": 3.2102882159795696, "grad_norm": 1.5240170572509457, "learning_rate": 3.4214304512770823e-06, "loss": 0.19125808715820314, "step": 22000 }, { "epoch": 3.356220357533747, "grad_norm": 1.5774125509908914, "learning_rate": 2.9471320714071095e-06, "loss": 0.18847378540039061, "step": 23000 }, { "epoch": 3.502152499087924, "grad_norm": 1.8421166872943977, "learning_rate": 2.4941206057740675e-06, "loss": 0.19000143432617186, "step": 24000 }, { "epoch": 3.6480846406421015, "grad_norm": 1.5694212047061644, "learning_rate": 2.06709349062457e-06, "loss": 0.18782159423828124, "step": 25000 }, { "epoch": 3.7940167821962785, "grad_norm": 1.496800606594422, "learning_rate": 1.6704787212769829e-06, "loss": 0.18233416748046874, "step": 26000 }, { "epoch": 3.939948923750456, "grad_norm": 1.984744582152809, "learning_rate": 1.3083889366705216e-06, "loss": 0.17911607360839843, "step": 27000 }, { "epoch": 4.085808099233856, "grad_norm": 1.2567953809414274, "learning_rate": 9.845787739562829e-07, "loss": 0.12317549133300781, "step": 28000 }, { "epoch": 4.231740240788033, "grad_norm": 1.091077915720754, "learning_rate": 7.024059353355333e-07, "loss": 0.08010615539550782, "step": 29000 }, { "epoch": 4.377672382342211, "grad_norm": 1.2460581955478622, "learning_rate": 4.64796370857008e-07, "loss": 0.08023818969726562, "step": 30000 }, { "epoch": 4.523604523896388, "grad_norm": 1.7839106451867877, "learning_rate": 2.7421393820510846e-07, "loss": 0.07947054290771484, "step": 31000 }, { "epoch": 4.669536665450566, "grad_norm": 1.1265268481087043, "learning_rate": 1.326348540874095e-07, "loss": 0.07735189056396484, "step": 32000 }, { "epoch": 4.815468807004743, "grad_norm": 1.0896372619502366, "learning_rate": 4.152720214406214e-08, "loss": 0.07909833526611328, "step": 33000 }, { "epoch": 4.96140094855892, "grad_norm": 1.2750658260191035, "learning_rate": 1.8357098688476238e-09, "loss": 0.07906644439697266, "step": 34000 }, { "epoch": 5.0, "step": 34265, "total_flos": 1091733654863872.0, "train_loss": 0.42485430567312915, "train_runtime": 114160.2816, "train_samples_per_second": 2.401, "train_steps_per_second": 0.3 } ], "logging_steps": 1000, "max_steps": 34265, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1091733654863872.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }