280 lines
6.4 KiB
JSON
280 lines
6.4 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.3491271820448878,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 350,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0113314447592068,
|
||
|
|
"grad_norm": 4.46875,
|
||
|
|
"learning_rate": 3.6000000000000003e-06,
|
||
|
|
"loss": 0.5448,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0226628895184136,
|
||
|
|
"grad_norm": 0.74609375,
|
||
|
|
"learning_rate": 7.600000000000001e-06,
|
||
|
|
"loss": 0.4594,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0339943342776204,
|
||
|
|
"grad_norm": 0.380859375,
|
||
|
|
"learning_rate": 1.16e-05,
|
||
|
|
"loss": 0.3953,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0453257790368272,
|
||
|
|
"grad_norm": 0.31640625,
|
||
|
|
"learning_rate": 1.5600000000000003e-05,
|
||
|
|
"loss": 0.3695,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.056657223796033995,
|
||
|
|
"grad_norm": 0.2314453125,
|
||
|
|
"learning_rate": 1.9600000000000002e-05,
|
||
|
|
"loss": 0.3366,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0679886685552408,
|
||
|
|
"grad_norm": 0.21484375,
|
||
|
|
"learning_rate": 1.9998642592088543e-05,
|
||
|
|
"loss": 0.3204,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07932011331444759,
|
||
|
|
"grad_norm": 0.2060546875,
|
||
|
|
"learning_rate": 1.9993950790937545e-05,
|
||
|
|
"loss": 0.3231,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0906515580736544,
|
||
|
|
"grad_norm": 0.2236328125,
|
||
|
|
"learning_rate": 1.9985909410557404e-05,
|
||
|
|
"loss": 0.3127,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10198300283286119,
|
||
|
|
"grad_norm": 0.21875,
|
||
|
|
"learning_rate": 1.9974521146102535e-05,
|
||
|
|
"loss": 0.3033,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11331444759206799,
|
||
|
|
"grad_norm": 0.267578125,
|
||
|
|
"learning_rate": 1.9959789814471278e-05,
|
||
|
|
"loss": 0.2907,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12464589235127478,
|
||
|
|
"grad_norm": 0.24609375,
|
||
|
|
"learning_rate": 1.9941720353026582e-05,
|
||
|
|
"loss": 0.2941,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1359773371104816,
|
||
|
|
"grad_norm": 0.2236328125,
|
||
|
|
"learning_rate": 1.9920318817941234e-05,
|
||
|
|
"loss": 0.2844,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14730878186968838,
|
||
|
|
"grad_norm": 0.23828125,
|
||
|
|
"learning_rate": 1.9895592382168036e-05,
|
||
|
|
"loss": 0.2801,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15864022662889518,
|
||
|
|
"grad_norm": 0.2294921875,
|
||
|
|
"learning_rate": 1.986754933303574e-05,
|
||
|
|
"loss": 0.2805,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16997167138810199,
|
||
|
|
"grad_norm": 0.2177734375,
|
||
|
|
"learning_rate": 1.983619906947144e-05,
|
||
|
|
"loss": 0.2706,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1813031161473088,
|
||
|
|
"grad_norm": 0.240234375,
|
||
|
|
"learning_rate": 1.980155209885043e-05,
|
||
|
|
"loss": 0.2756,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19263456090651557,
|
||
|
|
"grad_norm": 0.232421875,
|
||
|
|
"learning_rate": 1.9763620033474552e-05,
|
||
|
|
"loss": 0.2713,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20396600566572237,
|
||
|
|
"grad_norm": 0.2470703125,
|
||
|
|
"learning_rate": 1.9722415586680204e-05,
|
||
|
|
"loss": 0.2675,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21529745042492918,
|
||
|
|
"grad_norm": 0.236328125,
|
||
|
|
"learning_rate": 1.9677952568577316e-05,
|
||
|
|
"loss": 0.2574,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22662889518413598,
|
||
|
|
"grad_norm": 0.259765625,
|
||
|
|
"learning_rate": 1.9630245881420764e-05,
|
||
|
|
"loss": 0.2636,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23796033994334279,
|
||
|
|
"grad_norm": 0.26953125,
|
||
|
|
"learning_rate": 1.957931151461572e-05,
|
||
|
|
"loss": 0.2614,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24929178470254956,
|
||
|
|
"grad_norm": 0.271484375,
|
||
|
|
"learning_rate": 1.9525166539358608e-05,
|
||
|
|
"loss": 0.2548,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26062322946175637,
|
||
|
|
"grad_norm": 0.2412109375,
|
||
|
|
"learning_rate": 1.946782910291554e-05,
|
||
|
|
"loss": 0.2532,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2719546742209632,
|
||
|
|
"grad_norm": 0.271484375,
|
||
|
|
"learning_rate": 1.9407318422540057e-05,
|
||
|
|
"loss": 0.2545,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28328611898017,
|
||
|
|
"grad_norm": 0.267578125,
|
||
|
|
"learning_rate": 1.9343654779032244e-05,
|
||
|
|
"loss": 0.251,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2593516209476309,
|
||
|
|
"grad_norm": 0.29296875,
|
||
|
|
"learning_rate": 1.944186059309318e-05,
|
||
|
|
"loss": 0.2362,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26932668329177056,
|
||
|
|
"grad_norm": 0.31640625,
|
||
|
|
"learning_rate": 1.93877370638343e-05,
|
||
|
|
"loss": 0.2377,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2793017456359102,
|
||
|
|
"grad_norm": 0.3125,
|
||
|
|
"learning_rate": 1.9331191872025963e-05,
|
||
|
|
"loss": 0.2346,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2892768079800499,
|
||
|
|
"grad_norm": 0.400390625,
|
||
|
|
"learning_rate": 1.927223960407727e-05,
|
||
|
|
"loss": 0.233,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29925187032418954,
|
||
|
|
"grad_norm": 0.337890625,
|
||
|
|
"learning_rate": 1.921089546732717e-05,
|
||
|
|
"loss": 0.23,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3092269326683292,
|
||
|
|
"grad_norm": 0.31640625,
|
||
|
|
"learning_rate": 1.9147175286121577e-05,
|
||
|
|
"loss": 0.2345,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3192019950124688,
|
||
|
|
"grad_norm": 0.298828125,
|
||
|
|
"learning_rate": 1.90810954977313e-05,
|
||
|
|
"loss": 0.2325,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32917705735660846,
|
||
|
|
"grad_norm": 0.3359375,
|
||
|
|
"learning_rate": 1.9012673148111908e-05,
|
||
|
|
"loss": 0.2322,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33915211970074816,
|
||
|
|
"grad_norm": 0.298828125,
|
||
|
|
"learning_rate": 1.8941925887506527e-05,
|
||
|
|
"loss": 0.2257,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3491271820448878,
|
||
|
|
"grad_norm": 0.31640625,
|
||
|
|
"learning_rate": 1.8868871965892794e-05,
|
||
|
|
"loss": 0.2273,
|
||
|
|
"step": 350
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 10,
|
||
|
|
"max_steps": 2006,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 2,
|
||
|
|
"save_steps": 50,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": false
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 5.749022813082747e+18,
|
||
|
|
"train_batch_size": 25,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|