{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 25.771084337349397, "eval_steps": 2, "global_step": 26, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7710843373493976, "grad_norm": 6.121489677872157, "learning_rate": 6.25e-07, "loss": 0.8753013014793396, "memory(GiB)": 34.86, "step": 1, "token_acc": 0.7918330258556598, "train_speed(iter/s)": 0.009904 }, { "epoch": 1.7710843373493976, "grad_norm": 11.509424437532436, "learning_rate": 1.25e-06, "loss": 1.762073278427124, "memory(GiB)": 39.41, "step": 2, "token_acc": 0.7978716452742124, "train_speed(iter/s)": 0.00936 }, { "epoch": 2.7710843373493974, "grad_norm": 11.323944309901117, "learning_rate": 1.8750000000000003e-06, "loss": 1.7230606079101562, "memory(GiB)": 42.29, "step": 3, "token_acc": 0.7989877731008218, "train_speed(iter/s)": 0.008908 }, { "epoch": 3.7710843373493974, "grad_norm": 10.87196352874565, "learning_rate": 2.5e-06, "loss": 1.7452430725097656, "memory(GiB)": 42.29, "step": 4, "token_acc": 0.7974636739751764, "train_speed(iter/s)": 0.008888 }, { "epoch": 4.771084337349397, "grad_norm": 11.053879691862349, "learning_rate": 3.125e-06, "loss": 1.683917760848999, "memory(GiB)": 42.29, "step": 5, "token_acc": 0.7954782471812833, "train_speed(iter/s)": 0.008692 }, { "epoch": 5.771084337349397, "grad_norm": 8.606854760903413, "learning_rate": 3.7500000000000005e-06, "loss": 1.5291715860366821, "memory(GiB)": 42.29, "step": 6, "token_acc": 0.8125722279666687, "train_speed(iter/s)": 0.008684 }, { "epoch": 6.771084337349397, "grad_norm": 5.022068500005259, "learning_rate": 4.3750000000000005e-06, "loss": 1.410496473312378, "memory(GiB)": 42.29, "step": 7, "token_acc": 0.8070179394950782, "train_speed(iter/s)": 0.008583 }, { "epoch": 7.771084337349397, "grad_norm": 3.8329268469171702, "learning_rate": 5e-06, "loss": 1.1963456869125366, "memory(GiB)": 42.29, "step": 8, "token_acc": 0.8275855412383573, "train_speed(iter/s)": 0.008616 }, { "epoch": 8.771084337349398, "grad_norm": 3.848141778586357, "learning_rate": 4.987961816680493e-06, "loss": 1.1539512872695923, "memory(GiB)": 42.29, "step": 9, "token_acc": 0.8452060931899642, "train_speed(iter/s)": 0.00854 }, { "epoch": 9.771084337349398, "grad_norm": 2.977196631037463, "learning_rate": 4.9519632010080765e-06, "loss": 1.0900822877883911, "memory(GiB)": 42.29, "step": 10, "token_acc": 0.8439449530665865, "train_speed(iter/s)": 0.008583 }, { "epoch": 10.771084337349398, "grad_norm": 2.3240379853396145, "learning_rate": 4.8923508393305224e-06, "loss": 0.9584915637969971, "memory(GiB)": 42.29, "step": 11, "token_acc": 0.8541569662165658, "train_speed(iter/s)": 0.00854 }, { "epoch": 11.771084337349398, "grad_norm": 1.7059344045170224, "learning_rate": 4.809698831278217e-06, "loss": 0.9206792116165161, "memory(GiB)": 42.29, "step": 12, "token_acc": 0.8550325931866718, "train_speed(iter/s)": 0.00856 }, { "epoch": 12.771084337349398, "grad_norm": 1.7886326192292616, "learning_rate": 4.704803160870888e-06, "loss": 0.8803208470344543, "memory(GiB)": 42.29, "step": 13, "token_acc": 0.8565676850786719, "train_speed(iter/s)": 0.008514 }, { "epoch": 13.771084337349398, "grad_norm": 1.5286406890043707, "learning_rate": 4.578674030756364e-06, "loss": 0.8406718969345093, "memory(GiB)": 42.29, "step": 14, "token_acc": 0.868490055655166, "train_speed(iter/s)": 0.008553 }, { "epoch": 14.771084337349398, "grad_norm": 1.4093835831424686, "learning_rate": 4.432526133406843e-06, "loss": 0.816148042678833, "memory(GiB)": 42.29, "step": 15, "token_acc": 0.8801949289867506, "train_speed(iter/s)": 0.008514 }, { "epoch": 15.771084337349398, "grad_norm": 1.3680984858266587, "learning_rate": 4.267766952966369e-06, "loss": 0.7781298756599426, "memory(GiB)": 42.29, "step": 16, "token_acc": 0.8775519188228432, "train_speed(iter/s)": 0.008531 }, { "epoch": 16.771084337349397, "grad_norm": 0.6513969535166108, "learning_rate": 4.085983210409114e-06, "loss": 0.7328703999519348, "memory(GiB)": 42.29, "step": 17, "token_acc": 0.8854784825706624, "train_speed(iter/s)": 0.008507 }, { "epoch": 17.771084337349397, "grad_norm": 1.1321914535679016, "learning_rate": 3.888925582549006e-06, "loss": 0.7167081832885742, "memory(GiB)": 42.29, "step": 18, "token_acc": 0.8828302499188575, "train_speed(iter/s)": 0.008528 }, { "epoch": 18.771084337349397, "grad_norm": 1.1087830646957209, "learning_rate": 3.6784918420649952e-06, "loss": 0.6928962469100952, "memory(GiB)": 42.29, "step": 19, "token_acc": 0.8914687444586997, "train_speed(iter/s)": 0.008501 }, { "epoch": 19.771084337349397, "grad_norm": 1.0244408604059563, "learning_rate": 3.4567085809127247e-06, "loss": 0.6718354821205139, "memory(GiB)": 42.29, "step": 20, "token_acc": 0.8931382342286962, "train_speed(iter/s)": 0.008518 }, { "epoch": 20.771084337349397, "grad_norm": 0.9684342265578457, "learning_rate": 3.225711693136156e-06, "loss": 0.64753657579422, "memory(GiB)": 42.29, "step": 21, "token_acc": 0.898327751680115, "train_speed(iter/s)": 0.008492 }, { "epoch": 21.771084337349397, "grad_norm": 0.8695314329605501, "learning_rate": 2.9877258050403214e-06, "loss": 0.6080504655838013, "memory(GiB)": 42.29, "step": 22, "token_acc": 0.8969131371141421, "train_speed(iter/s)": 0.008511 }, { "epoch": 22.771084337349397, "grad_norm": 0.7610645886404945, "learning_rate": 2.7450428508239024e-06, "loss": 0.5871363878250122, "memory(GiB)": 42.29, "step": 23, "token_acc": 0.9013859215427465, "train_speed(iter/s)": 0.008488 }, { "epoch": 23.771084337349397, "grad_norm": 0.838874811475282, "learning_rate": 2.5e-06, "loss": 0.6137609481811523, "memory(GiB)": 42.29, "step": 24, "token_acc": 0.9088375088841507, "train_speed(iter/s)": 0.008512 }, { "epoch": 24.771084337349397, "grad_norm": 0.7953361813418657, "learning_rate": 2.2549571491760985e-06, "loss": 0.6176888942718506, "memory(GiB)": 42.29, "step": 25, "token_acc": 0.9058841092793619, "train_speed(iter/s)": 0.008488 }, { "epoch": 25.771084337349397, "grad_norm": 0.8068676839609372, "learning_rate": 2.01227419495968e-06, "loss": 0.5883712768554688, "memory(GiB)": 42.29, "step": 26, "token_acc": 0.9068480043739748, "train_speed(iter/s)": 0.008491 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 22664558936064.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }