240 lines
5.7 KiB
JSON
240 lines
5.7 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 144,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.034904013961605584,
|
|
"grad_norm": 0.2724517285823822,
|
|
"learning_rate": 5.333333333333334e-06,
|
|
"loss": 0.0934,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.06980802792321117,
|
|
"grad_norm": 0.3414818048477173,
|
|
"learning_rate": 1.2e-05,
|
|
"loss": 0.0773,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 0.07792749255895615,
|
|
"learning_rate": 1.866666666666667e-05,
|
|
"loss": 0.0711,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.13961605584642234,
|
|
"grad_norm": 0.0294520054012537,
|
|
"learning_rate": 1.995259033893236e-05,
|
|
"loss": 0.0736,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.17452006980802792,
|
|
"grad_norm": 0.013957683928310871,
|
|
"learning_rate": 1.9760758775559275e-05,
|
|
"loss": 0.0697,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 0.065118707716465,
|
|
"learning_rate": 1.9424380828337146e-05,
|
|
"loss": 0.0699,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.2443280977312391,
|
|
"grad_norm": 0.021100476384162903,
|
|
"learning_rate": 1.894843789440892e-05,
|
|
"loss": 0.0697,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.2792321116928447,
|
|
"grad_norm": 0.026198429986834526,
|
|
"learning_rate": 1.833997817889878e-05,
|
|
"loss": 0.0695,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 0.07283973693847656,
|
|
"learning_rate": 1.760801231854278e-05,
|
|
"loss": 0.07,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.34904013961605584,
|
|
"grad_norm": 0.04578598588705063,
|
|
"learning_rate": 1.676337994380903e-05,
|
|
"loss": 0.0701,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.38394415357766143,
|
|
"grad_norm": 0.10095158964395523,
|
|
"learning_rate": 1.581858915557953e-05,
|
|
"loss": 0.0698,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 0.028562646359205246,
|
|
"learning_rate": 1.4787631293572094e-05,
|
|
"loss": 0.0699,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.4537521815008726,
|
|
"grad_norm": 0.02697976492345333,
|
|
"learning_rate": 1.368577373958362e-05,
|
|
"loss": 0.0695,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.4886561954624782,
|
|
"grad_norm": 0.0685800239443779,
|
|
"learning_rate": 1.2529333823916807e-05,
|
|
"loss": 0.0696,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 0.13133621215820312,
|
|
"learning_rate": 1.133543718319398e-05,
|
|
"loss": 0.0713,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.5584642233856894,
|
|
"grad_norm": 0.017290577292442322,
|
|
"learning_rate": 1.0121764148019977e-05,
|
|
"loss": 0.0696,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.5933682373472949,
|
|
"grad_norm": 0.05858515202999115,
|
|
"learning_rate": 8.906287916221259e-06,
|
|
"loss": 0.0696,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 0.07648473978042603,
|
|
"learning_rate": 7.707008389035102e-06,
|
|
"loss": 0.0699,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.6631762652705061,
|
|
"grad_norm": 0.052451424300670624,
|
|
"learning_rate": 6.5416856118498874e-06,
|
|
"loss": 0.0697,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.6980802792321117,
|
|
"grad_norm": 0.03691520541906357,
|
|
"learning_rate": 5.427576766953615e-06,
|
|
"loss": 0.0697,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 0.003152969991788268,
|
|
"learning_rate": 4.381180613146396e-06,
|
|
"loss": 0.0695,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.7678883071553229,
|
|
"grad_norm": 0.017924955114722252,
|
|
"learning_rate": 3.4179931567925216e-06,
|
|
"loss": 0.0694,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.8027923211169284,
|
|
"grad_norm": 0.04167533293366432,
|
|
"learning_rate": 2.5522781725621814e-06,
|
|
"loss": 0.0694,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 0.03422262519598007,
|
|
"learning_rate": 1.7968559722048906e-06,
|
|
"loss": 0.0692,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.8726003490401396,
|
|
"grad_norm": 0.0365980863571167,
|
|
"learning_rate": 1.1629135494628097e-06,
|
|
"loss": 0.0696,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.9075043630017452,
|
|
"grad_norm": 0.032294586300849915,
|
|
"learning_rate": 6.598389126745209e-07,
|
|
"loss": 0.0695,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 0.001334571628831327,
|
|
"learning_rate": 2.9508205842594727e-07,
|
|
"loss": 0.0695,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.9773123909249564,
|
|
"grad_norm": 0.05335932970046997,
|
|
"learning_rate": 7.404464507973608e-08,
|
|
"loss": 0.0693,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 144,
|
|
"total_flos": 2.4545020729727386e+17,
|
|
"train_loss": 0.07097241137590674,
|
|
"train_runtime": 1113.1898,
|
|
"train_samples_per_second": 16.457,
|
|
"train_steps_per_second": 0.129
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 144,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4545020729727386e+17,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|