Files
One-Shot-CFT-Math-Llama-3B/trainer_state.json
2025-06-04 05:28:06 +00:00

295 lines
8.0 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 25.771084337349397,
"eval_steps": 2,
"global_step": 26,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7710843373493976,
"grad_norm": 6.121489677872157,
"learning_rate": 6.25e-07,
"loss": 0.8753013014793396,
"memory(GiB)": 34.86,
"step": 1,
"token_acc": 0.7918330258556598,
"train_speed(iter/s)": 0.009904
},
{
"epoch": 1.7710843373493976,
"grad_norm": 11.509424437532436,
"learning_rate": 1.25e-06,
"loss": 1.762073278427124,
"memory(GiB)": 39.41,
"step": 2,
"token_acc": 0.7978716452742124,
"train_speed(iter/s)": 0.00936
},
{
"epoch": 2.7710843373493974,
"grad_norm": 11.323944309901117,
"learning_rate": 1.8750000000000003e-06,
"loss": 1.7230606079101562,
"memory(GiB)": 42.29,
"step": 3,
"token_acc": 0.7989877731008218,
"train_speed(iter/s)": 0.008908
},
{
"epoch": 3.7710843373493974,
"grad_norm": 10.87196352874565,
"learning_rate": 2.5e-06,
"loss": 1.7452430725097656,
"memory(GiB)": 42.29,
"step": 4,
"token_acc": 0.7974636739751764,
"train_speed(iter/s)": 0.008888
},
{
"epoch": 4.771084337349397,
"grad_norm": 11.053879691862349,
"learning_rate": 3.125e-06,
"loss": 1.683917760848999,
"memory(GiB)": 42.29,
"step": 5,
"token_acc": 0.7954782471812833,
"train_speed(iter/s)": 0.008692
},
{
"epoch": 5.771084337349397,
"grad_norm": 8.606854760903413,
"learning_rate": 3.7500000000000005e-06,
"loss": 1.5291715860366821,
"memory(GiB)": 42.29,
"step": 6,
"token_acc": 0.8125722279666687,
"train_speed(iter/s)": 0.008684
},
{
"epoch": 6.771084337349397,
"grad_norm": 5.022068500005259,
"learning_rate": 4.3750000000000005e-06,
"loss": 1.410496473312378,
"memory(GiB)": 42.29,
"step": 7,
"token_acc": 0.8070179394950782,
"train_speed(iter/s)": 0.008583
},
{
"epoch": 7.771084337349397,
"grad_norm": 3.8329268469171702,
"learning_rate": 5e-06,
"loss": 1.1963456869125366,
"memory(GiB)": 42.29,
"step": 8,
"token_acc": 0.8275855412383573,
"train_speed(iter/s)": 0.008616
},
{
"epoch": 8.771084337349398,
"grad_norm": 3.848141778586357,
"learning_rate": 4.987961816680493e-06,
"loss": 1.1539512872695923,
"memory(GiB)": 42.29,
"step": 9,
"token_acc": 0.8452060931899642,
"train_speed(iter/s)": 0.00854
},
{
"epoch": 9.771084337349398,
"grad_norm": 2.977196631037463,
"learning_rate": 4.9519632010080765e-06,
"loss": 1.0900822877883911,
"memory(GiB)": 42.29,
"step": 10,
"token_acc": 0.8439449530665865,
"train_speed(iter/s)": 0.008583
},
{
"epoch": 10.771084337349398,
"grad_norm": 2.3240379853396145,
"learning_rate": 4.8923508393305224e-06,
"loss": 0.9584915637969971,
"memory(GiB)": 42.29,
"step": 11,
"token_acc": 0.8541569662165658,
"train_speed(iter/s)": 0.00854
},
{
"epoch": 11.771084337349398,
"grad_norm": 1.7059344045170224,
"learning_rate": 4.809698831278217e-06,
"loss": 0.9206792116165161,
"memory(GiB)": 42.29,
"step": 12,
"token_acc": 0.8550325931866718,
"train_speed(iter/s)": 0.00856
},
{
"epoch": 12.771084337349398,
"grad_norm": 1.7886326192292616,
"learning_rate": 4.704803160870888e-06,
"loss": 0.8803208470344543,
"memory(GiB)": 42.29,
"step": 13,
"token_acc": 0.8565676850786719,
"train_speed(iter/s)": 0.008514
},
{
"epoch": 13.771084337349398,
"grad_norm": 1.5286406890043707,
"learning_rate": 4.578674030756364e-06,
"loss": 0.8406718969345093,
"memory(GiB)": 42.29,
"step": 14,
"token_acc": 0.868490055655166,
"train_speed(iter/s)": 0.008553
},
{
"epoch": 14.771084337349398,
"grad_norm": 1.4093835831424686,
"learning_rate": 4.432526133406843e-06,
"loss": 0.816148042678833,
"memory(GiB)": 42.29,
"step": 15,
"token_acc": 0.8801949289867506,
"train_speed(iter/s)": 0.008514
},
{
"epoch": 15.771084337349398,
"grad_norm": 1.3680984858266587,
"learning_rate": 4.267766952966369e-06,
"loss": 0.7781298756599426,
"memory(GiB)": 42.29,
"step": 16,
"token_acc": 0.8775519188228432,
"train_speed(iter/s)": 0.008531
},
{
"epoch": 16.771084337349397,
"grad_norm": 0.6513969535166108,
"learning_rate": 4.085983210409114e-06,
"loss": 0.7328703999519348,
"memory(GiB)": 42.29,
"step": 17,
"token_acc": 0.8854784825706624,
"train_speed(iter/s)": 0.008507
},
{
"epoch": 17.771084337349397,
"grad_norm": 1.1321914535679016,
"learning_rate": 3.888925582549006e-06,
"loss": 0.7167081832885742,
"memory(GiB)": 42.29,
"step": 18,
"token_acc": 0.8828302499188575,
"train_speed(iter/s)": 0.008528
},
{
"epoch": 18.771084337349397,
"grad_norm": 1.1087830646957209,
"learning_rate": 3.6784918420649952e-06,
"loss": 0.6928962469100952,
"memory(GiB)": 42.29,
"step": 19,
"token_acc": 0.8914687444586997,
"train_speed(iter/s)": 0.008501
},
{
"epoch": 19.771084337349397,
"grad_norm": 1.0244408604059563,
"learning_rate": 3.4567085809127247e-06,
"loss": 0.6718354821205139,
"memory(GiB)": 42.29,
"step": 20,
"token_acc": 0.8931382342286962,
"train_speed(iter/s)": 0.008518
},
{
"epoch": 20.771084337349397,
"grad_norm": 0.9684342265578457,
"learning_rate": 3.225711693136156e-06,
"loss": 0.64753657579422,
"memory(GiB)": 42.29,
"step": 21,
"token_acc": 0.898327751680115,
"train_speed(iter/s)": 0.008492
},
{
"epoch": 21.771084337349397,
"grad_norm": 0.8695314329605501,
"learning_rate": 2.9877258050403214e-06,
"loss": 0.6080504655838013,
"memory(GiB)": 42.29,
"step": 22,
"token_acc": 0.8969131371141421,
"train_speed(iter/s)": 0.008511
},
{
"epoch": 22.771084337349397,
"grad_norm": 0.7610645886404945,
"learning_rate": 2.7450428508239024e-06,
"loss": 0.5871363878250122,
"memory(GiB)": 42.29,
"step": 23,
"token_acc": 0.9013859215427465,
"train_speed(iter/s)": 0.008488
},
{
"epoch": 23.771084337349397,
"grad_norm": 0.838874811475282,
"learning_rate": 2.5e-06,
"loss": 0.6137609481811523,
"memory(GiB)": 42.29,
"step": 24,
"token_acc": 0.9088375088841507,
"train_speed(iter/s)": 0.008512
},
{
"epoch": 24.771084337349397,
"grad_norm": 0.7953361813418657,
"learning_rate": 2.2549571491760985e-06,
"loss": 0.6176888942718506,
"memory(GiB)": 42.29,
"step": 25,
"token_acc": 0.9058841092793619,
"train_speed(iter/s)": 0.008488
},
{
"epoch": 25.771084337349397,
"grad_norm": 0.8068676839609372,
"learning_rate": 2.01227419495968e-06,
"loss": 0.5883712768554688,
"memory(GiB)": 42.29,
"step": 26,
"token_acc": 0.9068480043739748,
"train_speed(iter/s)": 0.008491
}
],
"logging_steps": 1,
"max_steps": 40,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 2,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 22664558936064.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}