239 lines
5.7 KiB
JSON
239 lines
5.7 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 285,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.03508771929824561,
|
|
"grad_norm": 6.543553342182306,
|
|
"learning_rate": 3.448275862068966e-06,
|
|
"loss": 1.8558,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.07017543859649122,
|
|
"grad_norm": 3.5154288610740023,
|
|
"learning_rate": 6.896551724137932e-06,
|
|
"loss": 1.4458,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.10526315789473684,
|
|
"grad_norm": 2.778543537236469,
|
|
"learning_rate": 9.999623509195724e-06,
|
|
"loss": 1.2409,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.14035087719298245,
|
|
"grad_norm": 2.5059789149316045,
|
|
"learning_rate": 9.9545131771389e-06,
|
|
"loss": 1.1153,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.17543859649122806,
|
|
"grad_norm": 2.33426924896289,
|
|
"learning_rate": 9.834882355224261e-06,
|
|
"loss": 1.0284,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.21052631578947367,
|
|
"grad_norm": 2.238033736817208,
|
|
"learning_rate": 9.64253040236608e-06,
|
|
"loss": 0.9903,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.24561403508771928,
|
|
"grad_norm": 2.172103858227053,
|
|
"learning_rate": 9.380350470977033e-06,
|
|
"loss": 0.9464,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.2807017543859649,
|
|
"grad_norm": 2.0775067726943957,
|
|
"learning_rate": 9.052285991262975e-06,
|
|
"loss": 0.9232,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.3157894736842105,
|
|
"grad_norm": 2.036467828021783,
|
|
"learning_rate": 8.663271358362064e-06,
|
|
"loss": 0.8927,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.3508771929824561,
|
|
"grad_norm": 2.092139892865783,
|
|
"learning_rate": 8.219157714448957e-06,
|
|
"loss": 0.8748,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.38596491228070173,
|
|
"grad_norm": 2.354936030454241,
|
|
"learning_rate": 7.726624942110233e-06,
|
|
"loss": 0.8712,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.42105263157894735,
|
|
"grad_norm": 2.4480552576537313,
|
|
"learning_rate": 7.193081192692639e-06,
|
|
"loss": 0.8413,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.45614035087719296,
|
|
"grad_norm": 2.225961040994733,
|
|
"learning_rate": 6.626551460811316e-06,
|
|
"loss": 0.8245,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.49122807017543857,
|
|
"grad_norm": 2.2280118933835227,
|
|
"learning_rate": 6.035556880961093e-06,
|
|
"loss": 0.7995,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.5263157894736842,
|
|
"grad_norm": 2.1092585894132694,
|
|
"learning_rate": 5.4289865617222005e-06,
|
|
"loss": 0.7919,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.5614035087719298,
|
|
"grad_norm": 2.2317869413724947,
|
|
"learning_rate": 4.815963885293206e-06,
|
|
"loss": 0.794,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.5964912280701754,
|
|
"grad_norm": 2.1555810331504976,
|
|
"learning_rate": 4.205709283330694e-06,
|
|
"loss": 0.7713,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.631578947368421,
|
|
"grad_norm": 2.0442009159494785,
|
|
"learning_rate": 3.6074015530747354e-06,
|
|
"loss": 0.7775,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 2.113365321780909,
|
|
"learning_rate": 3.0300397996947604e-06,
|
|
"loss": 0.7515,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.7017543859649122,
|
|
"grad_norm": 2.1674953587420838,
|
|
"learning_rate": 2.482308081371413e-06,
|
|
"loss": 0.765,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.7368421052631579,
|
|
"grad_norm": 2.2304080548395917,
|
|
"learning_rate": 1.972444792978373e-06,
|
|
"loss": 0.7528,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.7719298245614035,
|
|
"grad_norm": 2.40752310761597,
|
|
"learning_rate": 1.508118752955136e-06,
|
|
"loss": 0.7396,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.8070175438596491,
|
|
"grad_norm": 2.117568701663915,
|
|
"learning_rate": 1.0963138571395277e-06,
|
|
"loss": 0.7408,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.8421052631578947,
|
|
"grad_norm": 2.919503306825211,
|
|
"learning_rate": 7.43224034473674e-07,
|
|
"loss": 0.7486,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.8771929824561403,
|
|
"grad_norm": 2.125280125804679,
|
|
"learning_rate": 4.5416008454738813e-07,
|
|
"loss": 0.7359,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.9122807017543859,
|
|
"grad_norm": 2.066124288511879,
|
|
"learning_rate": 2.3346979822903071e-07,
|
|
"loss": 0.7375,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.9473684210526315,
|
|
"grad_norm": 1.9772889405004763,
|
|
"learning_rate": 8.447256284391858e-08,
|
|
"loss": 0.7245,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.9824561403508771,
|
|
"grad_norm": 1.9570228116416033,
|
|
"learning_rate": 9.409435499254105e-09,
|
|
"loss": 0.7378,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 285,
|
|
"total_flos": 1.4819175628485427e+17,
|
|
"train_loss": 0.8990087810315583,
|
|
"train_runtime": 350.8438,
|
|
"train_samples_per_second": 103.861,
|
|
"train_steps_per_second": 0.812
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 285,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 1.4819175628485427e+17,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|