Files
Llama3.2-3B_Paper_Impact_pa…/trainer_state.json

240 lines
5.7 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 144,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.034904013961605584,
"grad_norm": 0.2724517285823822,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0934,
"step": 5
},
{
"epoch": 0.06980802792321117,
"grad_norm": 0.3414818048477173,
"learning_rate": 1.2e-05,
"loss": 0.0773,
"step": 10
},
{
"epoch": 0.10471204188481675,
"grad_norm": 0.07792749255895615,
"learning_rate": 1.866666666666667e-05,
"loss": 0.0711,
"step": 15
},
{
"epoch": 0.13961605584642234,
"grad_norm": 0.0294520054012537,
"learning_rate": 1.995259033893236e-05,
"loss": 0.0736,
"step": 20
},
{
"epoch": 0.17452006980802792,
"grad_norm": 0.013957683928310871,
"learning_rate": 1.9760758775559275e-05,
"loss": 0.0697,
"step": 25
},
{
"epoch": 0.2094240837696335,
"grad_norm": 0.065118707716465,
"learning_rate": 1.9424380828337146e-05,
"loss": 0.0699,
"step": 30
},
{
"epoch": 0.2443280977312391,
"grad_norm": 0.021100476384162903,
"learning_rate": 1.894843789440892e-05,
"loss": 0.0697,
"step": 35
},
{
"epoch": 0.2792321116928447,
"grad_norm": 0.026198429986834526,
"learning_rate": 1.833997817889878e-05,
"loss": 0.0695,
"step": 40
},
{
"epoch": 0.31413612565445026,
"grad_norm": 0.07283973693847656,
"learning_rate": 1.760801231854278e-05,
"loss": 0.07,
"step": 45
},
{
"epoch": 0.34904013961605584,
"grad_norm": 0.04578598588705063,
"learning_rate": 1.676337994380903e-05,
"loss": 0.0701,
"step": 50
},
{
"epoch": 0.38394415357766143,
"grad_norm": 0.10095158964395523,
"learning_rate": 1.581858915557953e-05,
"loss": 0.0698,
"step": 55
},
{
"epoch": 0.418848167539267,
"grad_norm": 0.028562646359205246,
"learning_rate": 1.4787631293572094e-05,
"loss": 0.0699,
"step": 60
},
{
"epoch": 0.4537521815008726,
"grad_norm": 0.02697976492345333,
"learning_rate": 1.368577373958362e-05,
"loss": 0.0695,
"step": 65
},
{
"epoch": 0.4886561954624782,
"grad_norm": 0.0685800239443779,
"learning_rate": 1.2529333823916807e-05,
"loss": 0.0696,
"step": 70
},
{
"epoch": 0.5235602094240838,
"grad_norm": 0.13133621215820312,
"learning_rate": 1.133543718319398e-05,
"loss": 0.0713,
"step": 75
},
{
"epoch": 0.5584642233856894,
"grad_norm": 0.017290577292442322,
"learning_rate": 1.0121764148019977e-05,
"loss": 0.0696,
"step": 80
},
{
"epoch": 0.5933682373472949,
"grad_norm": 0.05858515202999115,
"learning_rate": 8.906287916221259e-06,
"loss": 0.0696,
"step": 85
},
{
"epoch": 0.6282722513089005,
"grad_norm": 0.07648473978042603,
"learning_rate": 7.707008389035102e-06,
"loss": 0.0699,
"step": 90
},
{
"epoch": 0.6631762652705061,
"grad_norm": 0.052451424300670624,
"learning_rate": 6.5416856118498874e-06,
"loss": 0.0697,
"step": 95
},
{
"epoch": 0.6980802792321117,
"grad_norm": 0.03691520541906357,
"learning_rate": 5.427576766953615e-06,
"loss": 0.0697,
"step": 100
},
{
"epoch": 0.7329842931937173,
"grad_norm": 0.003152969991788268,
"learning_rate": 4.381180613146396e-06,
"loss": 0.0695,
"step": 105
},
{
"epoch": 0.7678883071553229,
"grad_norm": 0.017924955114722252,
"learning_rate": 3.4179931567925216e-06,
"loss": 0.0694,
"step": 110
},
{
"epoch": 0.8027923211169284,
"grad_norm": 0.04167533293366432,
"learning_rate": 2.5522781725621814e-06,
"loss": 0.0694,
"step": 115
},
{
"epoch": 0.837696335078534,
"grad_norm": 0.03422262519598007,
"learning_rate": 1.7968559722048906e-06,
"loss": 0.0692,
"step": 120
},
{
"epoch": 0.8726003490401396,
"grad_norm": 0.0365980863571167,
"learning_rate": 1.1629135494628097e-06,
"loss": 0.0696,
"step": 125
},
{
"epoch": 0.9075043630017452,
"grad_norm": 0.032294586300849915,
"learning_rate": 6.598389126745209e-07,
"loss": 0.0695,
"step": 130
},
{
"epoch": 0.9424083769633508,
"grad_norm": 0.001334571628831327,
"learning_rate": 2.9508205842594727e-07,
"loss": 0.0695,
"step": 135
},
{
"epoch": 0.9773123909249564,
"grad_norm": 0.05335932970046997,
"learning_rate": 7.404464507973608e-08,
"loss": 0.0693,
"step": 140
},
{
"epoch": 1.0,
"step": 144,
"total_flos": 2.4545020729727386e+17,
"train_loss": 0.07097241137590674,
"train_runtime": 1113.1898,
"train_samples_per_second": 16.457,
"train_steps_per_second": 0.129
}
],
"logging_steps": 5,
"max_steps": 144,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4545020729727386e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}