Files
Llama3.2-3B_Paper_Impact_me…/trainer_state.json

275 lines
6.6 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 166,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.030211480362537766,
"grad_norm": 0.21704457700252533,
"learning_rate": 4.705882352941177e-06,
"loss": 0.284,
"step": 5
},
{
"epoch": 0.06042296072507553,
"grad_norm": 0.07700642943382263,
"learning_rate": 1.0588235294117648e-05,
"loss": 0.091,
"step": 10
},
{
"epoch": 0.09063444108761329,
"grad_norm": 1.0004132986068726,
"learning_rate": 1.647058823529412e-05,
"loss": 0.0881,
"step": 15
},
{
"epoch": 0.12084592145015106,
"grad_norm": 0.017268722876906395,
"learning_rate": 1.9991110182465032e-05,
"loss": 0.0856,
"step": 20
},
{
"epoch": 0.1510574018126888,
"grad_norm": 0.06252593547105789,
"learning_rate": 1.9891281165856876e-05,
"loss": 0.0776,
"step": 25
},
{
"epoch": 0.18126888217522658,
"grad_norm": 0.013158817775547504,
"learning_rate": 1.968162302997659e-05,
"loss": 0.0796,
"step": 30
},
{
"epoch": 0.21148036253776434,
"grad_norm": 0.054746970534324646,
"learning_rate": 1.9364463741042694e-05,
"loss": 0.0775,
"step": 35
},
{
"epoch": 0.24169184290030213,
"grad_norm": 0.027843380346894264,
"learning_rate": 1.8943324918225495e-05,
"loss": 0.0776,
"step": 40
},
{
"epoch": 0.2719033232628399,
"grad_norm": 0.09170668572187424,
"learning_rate": 1.8422882730893323e-05,
"loss": 0.0778,
"step": 45
},
{
"epoch": 0.3021148036253776,
"grad_norm": 0.049589045345783234,
"learning_rate": 1.7808915976161364e-05,
"loss": 0.0776,
"step": 50
},
{
"epoch": 0.3323262839879154,
"grad_norm": 0.057375721633434296,
"learning_rate": 1.710824191327075e-05,
"loss": 0.0787,
"step": 55
},
{
"epoch": 0.36253776435045315,
"grad_norm": 0.04218236356973648,
"learning_rate": 1.632864056726917e-05,
"loss": 0.079,
"step": 60
},
{
"epoch": 0.39274924471299094,
"grad_norm": 0.1125708743929863,
"learning_rate": 1.5478768342496872e-05,
"loss": 0.0776,
"step": 65
},
{
"epoch": 0.4229607250755287,
"grad_norm": 0.024594679474830627,
"learning_rate": 1.4568061905081874e-05,
"loss": 0.0779,
"step": 70
},
{
"epoch": 0.45317220543806647,
"grad_norm": 0.01017661951482296,
"learning_rate": 1.3606633401697557e-05,
"loss": 0.0782,
"step": 75
},
{
"epoch": 0.48338368580060426,
"grad_norm": 0.07011096179485321,
"learning_rate": 1.2605158178034656e-05,
"loss": 0.0791,
"step": 80
},
{
"epoch": 0.513595166163142,
"grad_norm": 0.01498446986079216,
"learning_rate": 1.157475624372018e-05,
"loss": 0.0792,
"step": 85
},
{
"epoch": 0.5438066465256798,
"grad_norm": 0.03184051066637039,
"learning_rate": 1.0526868799852797e-05,
"loss": 0.0779,
"step": 90
},
{
"epoch": 0.5740181268882175,
"grad_norm": 0.078987717628479,
"learning_rate": 9.473131200147205e-06,
"loss": 0.0781,
"step": 95
},
{
"epoch": 0.6042296072507553,
"grad_norm": 0.05952491611242294,
"learning_rate": 8.425243756279824e-06,
"loss": 0.0771,
"step": 100
},
{
"epoch": 0.6344410876132931,
"grad_norm": 0.014677044935524464,
"learning_rate": 7.394841821965345e-06,
"loss": 0.0771,
"step": 105
},
{
"epoch": 0.6646525679758308,
"grad_norm": 0.03106486238539219,
"learning_rate": 6.3933665983024465e-06,
"loss": 0.0776,
"step": 110
},
{
"epoch": 0.6948640483383686,
"grad_norm": 0.03548077121376991,
"learning_rate": 5.431938094918132e-06,
"loss": 0.0767,
"step": 115
},
{
"epoch": 0.7250755287009063,
"grad_norm": 0.02386642061173916,
"learning_rate": 4.5212316575031325e-06,
"loss": 0.0778,
"step": 120
},
{
"epoch": 0.7552870090634441,
"grad_norm": 0.03368431329727173,
"learning_rate": 3.6713594327308343e-06,
"loss": 0.0776,
"step": 125
},
{
"epoch": 0.7854984894259819,
"grad_norm": 0.016041960567235947,
"learning_rate": 2.891758086729253e-06,
"loss": 0.0769,
"step": 130
},
{
"epoch": 0.8157099697885196,
"grad_norm": 0.03274780884385109,
"learning_rate": 2.19108402383864e-06,
"loss": 0.0768,
"step": 135
},
{
"epoch": 0.8459214501510574,
"grad_norm": 0.03985007107257843,
"learning_rate": 1.5771172691066793e-06,
"loss": 0.0765,
"step": 140
},
{
"epoch": 0.8761329305135952,
"grad_norm": 0.02575680799782276,
"learning_rate": 1.0566750817745076e-06,
"loss": 0.077,
"step": 145
},
{
"epoch": 0.9063444108761329,
"grad_norm": 0.04101819917559624,
"learning_rate": 6.355362589573078e-07,
"loss": 0.0758,
"step": 150
},
{
"epoch": 0.9365558912386707,
"grad_norm": 0.06996775418519974,
"learning_rate": 3.1837697002341293e-07,
"loss": 0.0775,
"step": 155
},
{
"epoch": 0.9667673716012085,
"grad_norm": 0.007283793296664953,
"learning_rate": 1.0871883414312778e-07,
"loss": 0.0758,
"step": 160
},
{
"epoch": 0.9969788519637462,
"grad_norm": 0.017074227333068848,
"learning_rate": 8.889817534969425e-09,
"loss": 0.0768,
"step": 165
},
{
"epoch": 1.0,
"step": 166,
"total_flos": 2.8024067250035098e+17,
"train_loss": 0.08477670932749667,
"train_runtime": 1255.2183,
"train_samples_per_second": 16.86,
"train_steps_per_second": 0.132
}
],
"logging_steps": 5,
"max_steps": 166,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8024067250035098e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}