275 lines
6.6 KiB
JSON
275 lines
6.6 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 166,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.030211480362537766,
|
|
"grad_norm": 0.21704457700252533,
|
|
"learning_rate": 4.705882352941177e-06,
|
|
"loss": 0.284,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.06042296072507553,
|
|
"grad_norm": 0.07700642943382263,
|
|
"learning_rate": 1.0588235294117648e-05,
|
|
"loss": 0.091,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.09063444108761329,
|
|
"grad_norm": 1.0004132986068726,
|
|
"learning_rate": 1.647058823529412e-05,
|
|
"loss": 0.0881,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.12084592145015106,
|
|
"grad_norm": 0.017268722876906395,
|
|
"learning_rate": 1.9991110182465032e-05,
|
|
"loss": 0.0856,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.1510574018126888,
|
|
"grad_norm": 0.06252593547105789,
|
|
"learning_rate": 1.9891281165856876e-05,
|
|
"loss": 0.0776,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.18126888217522658,
|
|
"grad_norm": 0.013158817775547504,
|
|
"learning_rate": 1.968162302997659e-05,
|
|
"loss": 0.0796,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.21148036253776434,
|
|
"grad_norm": 0.054746970534324646,
|
|
"learning_rate": 1.9364463741042694e-05,
|
|
"loss": 0.0775,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.24169184290030213,
|
|
"grad_norm": 0.027843380346894264,
|
|
"learning_rate": 1.8943324918225495e-05,
|
|
"loss": 0.0776,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.2719033232628399,
|
|
"grad_norm": 0.09170668572187424,
|
|
"learning_rate": 1.8422882730893323e-05,
|
|
"loss": 0.0778,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.3021148036253776,
|
|
"grad_norm": 0.049589045345783234,
|
|
"learning_rate": 1.7808915976161364e-05,
|
|
"loss": 0.0776,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.3323262839879154,
|
|
"grad_norm": 0.057375721633434296,
|
|
"learning_rate": 1.710824191327075e-05,
|
|
"loss": 0.0787,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.36253776435045315,
|
|
"grad_norm": 0.04218236356973648,
|
|
"learning_rate": 1.632864056726917e-05,
|
|
"loss": 0.079,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.39274924471299094,
|
|
"grad_norm": 0.1125708743929863,
|
|
"learning_rate": 1.5478768342496872e-05,
|
|
"loss": 0.0776,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.4229607250755287,
|
|
"grad_norm": 0.024594679474830627,
|
|
"learning_rate": 1.4568061905081874e-05,
|
|
"loss": 0.0779,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.45317220543806647,
|
|
"grad_norm": 0.01017661951482296,
|
|
"learning_rate": 1.3606633401697557e-05,
|
|
"loss": 0.0782,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.48338368580060426,
|
|
"grad_norm": 0.07011096179485321,
|
|
"learning_rate": 1.2605158178034656e-05,
|
|
"loss": 0.0791,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.513595166163142,
|
|
"grad_norm": 0.01498446986079216,
|
|
"learning_rate": 1.157475624372018e-05,
|
|
"loss": 0.0792,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.5438066465256798,
|
|
"grad_norm": 0.03184051066637039,
|
|
"learning_rate": 1.0526868799852797e-05,
|
|
"loss": 0.0779,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.5740181268882175,
|
|
"grad_norm": 0.078987717628479,
|
|
"learning_rate": 9.473131200147205e-06,
|
|
"loss": 0.0781,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.6042296072507553,
|
|
"grad_norm": 0.05952491611242294,
|
|
"learning_rate": 8.425243756279824e-06,
|
|
"loss": 0.0771,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.6344410876132931,
|
|
"grad_norm": 0.014677044935524464,
|
|
"learning_rate": 7.394841821965345e-06,
|
|
"loss": 0.0771,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.6646525679758308,
|
|
"grad_norm": 0.03106486238539219,
|
|
"learning_rate": 6.3933665983024465e-06,
|
|
"loss": 0.0776,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.6948640483383686,
|
|
"grad_norm": 0.03548077121376991,
|
|
"learning_rate": 5.431938094918132e-06,
|
|
"loss": 0.0767,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.7250755287009063,
|
|
"grad_norm": 0.02386642061173916,
|
|
"learning_rate": 4.5212316575031325e-06,
|
|
"loss": 0.0778,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.7552870090634441,
|
|
"grad_norm": 0.03368431329727173,
|
|
"learning_rate": 3.6713594327308343e-06,
|
|
"loss": 0.0776,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.7854984894259819,
|
|
"grad_norm": 0.016041960567235947,
|
|
"learning_rate": 2.891758086729253e-06,
|
|
"loss": 0.0769,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.8157099697885196,
|
|
"grad_norm": 0.03274780884385109,
|
|
"learning_rate": 2.19108402383864e-06,
|
|
"loss": 0.0768,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.8459214501510574,
|
|
"grad_norm": 0.03985007107257843,
|
|
"learning_rate": 1.5771172691066793e-06,
|
|
"loss": 0.0765,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.8761329305135952,
|
|
"grad_norm": 0.02575680799782276,
|
|
"learning_rate": 1.0566750817745076e-06,
|
|
"loss": 0.077,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.9063444108761329,
|
|
"grad_norm": 0.04101819917559624,
|
|
"learning_rate": 6.355362589573078e-07,
|
|
"loss": 0.0758,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.9365558912386707,
|
|
"grad_norm": 0.06996775418519974,
|
|
"learning_rate": 3.1837697002341293e-07,
|
|
"loss": 0.0775,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.9667673716012085,
|
|
"grad_norm": 0.007283793296664953,
|
|
"learning_rate": 1.0871883414312778e-07,
|
|
"loss": 0.0758,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.9969788519637462,
|
|
"grad_norm": 0.017074227333068848,
|
|
"learning_rate": 8.889817534969425e-09,
|
|
"loss": 0.0768,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 166,
|
|
"total_flos": 2.8024067250035098e+17,
|
|
"train_loss": 0.08477670932749667,
|
|
"train_runtime": 1255.2183,
|
|
"train_samples_per_second": 16.86,
|
|
"train_steps_per_second": 0.132
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 166,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.8024067250035098e+17,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|