295 lines
8.0 KiB
JSON
295 lines
8.0 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 25.771084337349397,
|
|
"eval_steps": 2,
|
|
"global_step": 26,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.7710843373493976,
|
|
"grad_norm": 6.121489677872157,
|
|
"learning_rate": 6.25e-07,
|
|
"loss": 0.8753013014793396,
|
|
"memory(GiB)": 34.86,
|
|
"step": 1,
|
|
"token_acc": 0.7918330258556598,
|
|
"train_speed(iter/s)": 0.009904
|
|
},
|
|
{
|
|
"epoch": 1.7710843373493976,
|
|
"grad_norm": 11.509424437532436,
|
|
"learning_rate": 1.25e-06,
|
|
"loss": 1.762073278427124,
|
|
"memory(GiB)": 39.41,
|
|
"step": 2,
|
|
"token_acc": 0.7978716452742124,
|
|
"train_speed(iter/s)": 0.00936
|
|
},
|
|
{
|
|
"epoch": 2.7710843373493974,
|
|
"grad_norm": 11.323944309901117,
|
|
"learning_rate": 1.8750000000000003e-06,
|
|
"loss": 1.7230606079101562,
|
|
"memory(GiB)": 42.29,
|
|
"step": 3,
|
|
"token_acc": 0.7989877731008218,
|
|
"train_speed(iter/s)": 0.008908
|
|
},
|
|
{
|
|
"epoch": 3.7710843373493974,
|
|
"grad_norm": 10.87196352874565,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 1.7452430725097656,
|
|
"memory(GiB)": 42.29,
|
|
"step": 4,
|
|
"token_acc": 0.7974636739751764,
|
|
"train_speed(iter/s)": 0.008888
|
|
},
|
|
{
|
|
"epoch": 4.771084337349397,
|
|
"grad_norm": 11.053879691862349,
|
|
"learning_rate": 3.125e-06,
|
|
"loss": 1.683917760848999,
|
|
"memory(GiB)": 42.29,
|
|
"step": 5,
|
|
"token_acc": 0.7954782471812833,
|
|
"train_speed(iter/s)": 0.008692
|
|
},
|
|
{
|
|
"epoch": 5.771084337349397,
|
|
"grad_norm": 8.606854760903413,
|
|
"learning_rate": 3.7500000000000005e-06,
|
|
"loss": 1.5291715860366821,
|
|
"memory(GiB)": 42.29,
|
|
"step": 6,
|
|
"token_acc": 0.8125722279666687,
|
|
"train_speed(iter/s)": 0.008684
|
|
},
|
|
{
|
|
"epoch": 6.771084337349397,
|
|
"grad_norm": 5.022068500005259,
|
|
"learning_rate": 4.3750000000000005e-06,
|
|
"loss": 1.410496473312378,
|
|
"memory(GiB)": 42.29,
|
|
"step": 7,
|
|
"token_acc": 0.8070179394950782,
|
|
"train_speed(iter/s)": 0.008583
|
|
},
|
|
{
|
|
"epoch": 7.771084337349397,
|
|
"grad_norm": 3.8329268469171702,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.1963456869125366,
|
|
"memory(GiB)": 42.29,
|
|
"step": 8,
|
|
"token_acc": 0.8275855412383573,
|
|
"train_speed(iter/s)": 0.008616
|
|
},
|
|
{
|
|
"epoch": 8.771084337349398,
|
|
"grad_norm": 3.848141778586357,
|
|
"learning_rate": 4.987961816680493e-06,
|
|
"loss": 1.1539512872695923,
|
|
"memory(GiB)": 42.29,
|
|
"step": 9,
|
|
"token_acc": 0.8452060931899642,
|
|
"train_speed(iter/s)": 0.00854
|
|
},
|
|
{
|
|
"epoch": 9.771084337349398,
|
|
"grad_norm": 2.977196631037463,
|
|
"learning_rate": 4.9519632010080765e-06,
|
|
"loss": 1.0900822877883911,
|
|
"memory(GiB)": 42.29,
|
|
"step": 10,
|
|
"token_acc": 0.8439449530665865,
|
|
"train_speed(iter/s)": 0.008583
|
|
},
|
|
{
|
|
"epoch": 10.771084337349398,
|
|
"grad_norm": 2.3240379853396145,
|
|
"learning_rate": 4.8923508393305224e-06,
|
|
"loss": 0.9584915637969971,
|
|
"memory(GiB)": 42.29,
|
|
"step": 11,
|
|
"token_acc": 0.8541569662165658,
|
|
"train_speed(iter/s)": 0.00854
|
|
},
|
|
{
|
|
"epoch": 11.771084337349398,
|
|
"grad_norm": 1.7059344045170224,
|
|
"learning_rate": 4.809698831278217e-06,
|
|
"loss": 0.9206792116165161,
|
|
"memory(GiB)": 42.29,
|
|
"step": 12,
|
|
"token_acc": 0.8550325931866718,
|
|
"train_speed(iter/s)": 0.00856
|
|
},
|
|
{
|
|
"epoch": 12.771084337349398,
|
|
"grad_norm": 1.7886326192292616,
|
|
"learning_rate": 4.704803160870888e-06,
|
|
"loss": 0.8803208470344543,
|
|
"memory(GiB)": 42.29,
|
|
"step": 13,
|
|
"token_acc": 0.8565676850786719,
|
|
"train_speed(iter/s)": 0.008514
|
|
},
|
|
{
|
|
"epoch": 13.771084337349398,
|
|
"grad_norm": 1.5286406890043707,
|
|
"learning_rate": 4.578674030756364e-06,
|
|
"loss": 0.8406718969345093,
|
|
"memory(GiB)": 42.29,
|
|
"step": 14,
|
|
"token_acc": 0.868490055655166,
|
|
"train_speed(iter/s)": 0.008553
|
|
},
|
|
{
|
|
"epoch": 14.771084337349398,
|
|
"grad_norm": 1.4093835831424686,
|
|
"learning_rate": 4.432526133406843e-06,
|
|
"loss": 0.816148042678833,
|
|
"memory(GiB)": 42.29,
|
|
"step": 15,
|
|
"token_acc": 0.8801949289867506,
|
|
"train_speed(iter/s)": 0.008514
|
|
},
|
|
{
|
|
"epoch": 15.771084337349398,
|
|
"grad_norm": 1.3680984858266587,
|
|
"learning_rate": 4.267766952966369e-06,
|
|
"loss": 0.7781298756599426,
|
|
"memory(GiB)": 42.29,
|
|
"step": 16,
|
|
"token_acc": 0.8775519188228432,
|
|
"train_speed(iter/s)": 0.008531
|
|
},
|
|
{
|
|
"epoch": 16.771084337349397,
|
|
"grad_norm": 0.6513969535166108,
|
|
"learning_rate": 4.085983210409114e-06,
|
|
"loss": 0.7328703999519348,
|
|
"memory(GiB)": 42.29,
|
|
"step": 17,
|
|
"token_acc": 0.8854784825706624,
|
|
"train_speed(iter/s)": 0.008507
|
|
},
|
|
{
|
|
"epoch": 17.771084337349397,
|
|
"grad_norm": 1.1321914535679016,
|
|
"learning_rate": 3.888925582549006e-06,
|
|
"loss": 0.7167081832885742,
|
|
"memory(GiB)": 42.29,
|
|
"step": 18,
|
|
"token_acc": 0.8828302499188575,
|
|
"train_speed(iter/s)": 0.008528
|
|
},
|
|
{
|
|
"epoch": 18.771084337349397,
|
|
"grad_norm": 1.1087830646957209,
|
|
"learning_rate": 3.6784918420649952e-06,
|
|
"loss": 0.6928962469100952,
|
|
"memory(GiB)": 42.29,
|
|
"step": 19,
|
|
"token_acc": 0.8914687444586997,
|
|
"train_speed(iter/s)": 0.008501
|
|
},
|
|
{
|
|
"epoch": 19.771084337349397,
|
|
"grad_norm": 1.0244408604059563,
|
|
"learning_rate": 3.4567085809127247e-06,
|
|
"loss": 0.6718354821205139,
|
|
"memory(GiB)": 42.29,
|
|
"step": 20,
|
|
"token_acc": 0.8931382342286962,
|
|
"train_speed(iter/s)": 0.008518
|
|
},
|
|
{
|
|
"epoch": 20.771084337349397,
|
|
"grad_norm": 0.9684342265578457,
|
|
"learning_rate": 3.225711693136156e-06,
|
|
"loss": 0.64753657579422,
|
|
"memory(GiB)": 42.29,
|
|
"step": 21,
|
|
"token_acc": 0.898327751680115,
|
|
"train_speed(iter/s)": 0.008492
|
|
},
|
|
{
|
|
"epoch": 21.771084337349397,
|
|
"grad_norm": 0.8695314329605501,
|
|
"learning_rate": 2.9877258050403214e-06,
|
|
"loss": 0.6080504655838013,
|
|
"memory(GiB)": 42.29,
|
|
"step": 22,
|
|
"token_acc": 0.8969131371141421,
|
|
"train_speed(iter/s)": 0.008511
|
|
},
|
|
{
|
|
"epoch": 22.771084337349397,
|
|
"grad_norm": 0.7610645886404945,
|
|
"learning_rate": 2.7450428508239024e-06,
|
|
"loss": 0.5871363878250122,
|
|
"memory(GiB)": 42.29,
|
|
"step": 23,
|
|
"token_acc": 0.9013859215427465,
|
|
"train_speed(iter/s)": 0.008488
|
|
},
|
|
{
|
|
"epoch": 23.771084337349397,
|
|
"grad_norm": 0.838874811475282,
|
|
"learning_rate": 2.5e-06,
|
|
"loss": 0.6137609481811523,
|
|
"memory(GiB)": 42.29,
|
|
"step": 24,
|
|
"token_acc": 0.9088375088841507,
|
|
"train_speed(iter/s)": 0.008512
|
|
},
|
|
{
|
|
"epoch": 24.771084337349397,
|
|
"grad_norm": 0.7953361813418657,
|
|
"learning_rate": 2.2549571491760985e-06,
|
|
"loss": 0.6176888942718506,
|
|
"memory(GiB)": 42.29,
|
|
"step": 25,
|
|
"token_acc": 0.9058841092793619,
|
|
"train_speed(iter/s)": 0.008488
|
|
},
|
|
{
|
|
"epoch": 25.771084337349397,
|
|
"grad_norm": 0.8068676839609372,
|
|
"learning_rate": 2.01227419495968e-06,
|
|
"loss": 0.5883712768554688,
|
|
"memory(GiB)": 42.29,
|
|
"step": 26,
|
|
"token_acc": 0.9068480043739748,
|
|
"train_speed(iter/s)": 0.008491
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 40,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 40,
|
|
"save_steps": 2,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 22664558936064.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|