Files
swesmith-unified-10000__Qwe…/trainer_state.json

605 lines
17 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 259,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13513513513513514,
"grad_norm": 5.259959182216627,
"learning_rate": 6.153846153846155e-06,
"loss": 0.3116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09571799635887146,
"step": 5,
"valid_targets_mean": 6433.7,
"valid_targets_min": 577
},
{
"epoch": 0.2702702702702703,
"grad_norm": 1.3423309952029923,
"learning_rate": 1.3846153846153847e-05,
"loss": 0.2587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07112158834934235,
"step": 10,
"valid_targets_mean": 7327.8,
"valid_targets_min": 413
},
{
"epoch": 0.40540540540540543,
"grad_norm": 0.503106602146357,
"learning_rate": 2.153846153846154e-05,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06387270987033844,
"step": 15,
"valid_targets_mean": 7716.1,
"valid_targets_min": 701
},
{
"epoch": 0.5405405405405406,
"grad_norm": 0.43730375431686763,
"learning_rate": 2.923076923076923e-05,
"loss": 0.188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.057055432349443436,
"step": 20,
"valid_targets_mean": 6165.1,
"valid_targets_min": 2120
},
{
"epoch": 0.6756756756756757,
"grad_norm": 0.23457297766166457,
"learning_rate": 3.692307692307693e-05,
"loss": 0.1511,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04511473327875137,
"step": 25,
"valid_targets_mean": 6698.5,
"valid_targets_min": 579
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.22580079812560477,
"learning_rate": 3.998364045590232e-05,
"loss": 0.1382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04277999699115753,
"step": 30,
"valid_targets_mean": 5174.9,
"valid_targets_min": 340
},
{
"epoch": 0.9459459459459459,
"grad_norm": 0.15302847392142027,
"learning_rate": 3.988376236895231e-05,
"loss": 0.1277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03875000774860382,
"step": 35,
"valid_targets_mean": 6631.1,
"valid_targets_min": 431
},
{
"epoch": 1.0810810810810811,
"grad_norm": 0.14846277384715484,
"learning_rate": 3.969354804762473e-05,
"loss": 0.1171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.035136736929416656,
"step": 40,
"valid_targets_mean": 5226.1,
"valid_targets_min": 427
},
{
"epoch": 1.2162162162162162,
"grad_norm": 0.17362666626644624,
"learning_rate": 3.9413861676735034e-05,
"loss": 0.1152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.035750940442085266,
"step": 45,
"valid_targets_mean": 6524.1,
"valid_targets_min": 497
},
{
"epoch": 1.3513513513513513,
"grad_norm": 0.13178939301683926,
"learning_rate": 3.9045973931977495e-05,
"loss": 0.1102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.036801472306251526,
"step": 50,
"valid_targets_mean": 6943.8,
"valid_targets_min": 2015
},
{
"epoch": 1.4864864864864864,
"grad_norm": 0.18185896796674594,
"learning_rate": 3.8591556206970594e-05,
"loss": 0.106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03439167141914368,
"step": 55,
"valid_targets_mean": 5706.4,
"valid_targets_min": 453
},
{
"epoch": 1.6216216216216215,
"grad_norm": 0.1385453505062919,
"learning_rate": 3.805267301975424e-05,
"loss": 0.1059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.035251468420028687,
"step": 60,
"valid_targets_mean": 6418.2,
"valid_targets_min": 1363
},
{
"epoch": 1.7567567567567568,
"grad_norm": 0.13076540307986736,
"learning_rate": 3.743177263323758e-05,
"loss": 0.1042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03142703324556351,
"step": 65,
"valid_targets_mean": 5221.7,
"valid_targets_min": 577
},
{
"epoch": 1.8918918918918919,
"grad_norm": 0.13340506373409342,
"learning_rate": 3.673167593221097e-05,
"loss": 0.0995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.034315336495637894,
"step": 70,
"valid_targets_mean": 6568.5,
"valid_targets_min": 723
},
{
"epoch": 2.027027027027027,
"grad_norm": 0.1410479342684041,
"learning_rate": 3.5955563607456025e-05,
"loss": 0.0975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030078843235969543,
"step": 75,
"valid_targets_mean": 5617.2,
"valid_targets_min": 387
},
{
"epoch": 2.1621621621621623,
"grad_norm": 0.14495862504665655,
"learning_rate": 3.510696170517927e-05,
"loss": 0.0949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028750576078891754,
"step": 80,
"valid_targets_mean": 5150.5,
"valid_targets_min": 427
},
{
"epoch": 2.2972972972972974,
"grad_norm": 0.14212400520351012,
"learning_rate": 3.418972560742133e-05,
"loss": 0.0905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0317218042910099,
"step": 85,
"valid_targets_mean": 6628.6,
"valid_targets_min": 883
},
{
"epoch": 2.4324324324324325,
"grad_norm": 0.1337772062885725,
"learning_rate": 3.3208022516222195e-05,
"loss": 0.091,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0257179643958807,
"step": 90,
"valid_targets_mean": 6095.6,
"valid_targets_min": 436
},
{
"epoch": 2.5675675675675675,
"grad_norm": 0.17276086488015027,
"learning_rate": 3.2166312521120775e-05,
"loss": 0.0876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030876662582159042,
"step": 95,
"valid_targets_mean": 6510.9,
"valid_targets_min": 1635
},
{
"epoch": 2.7027027027027026,
"grad_norm": 0.1480157679597937,
"learning_rate": 3.106932833600314e-05,
"loss": 0.0865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02585354819893837,
"step": 100,
"valid_targets_mean": 6586.1,
"valid_targets_min": 1657
},
{
"epoch": 2.8378378378378377,
"grad_norm": 0.15909100754186584,
"learning_rate": 2.9922053797359406e-05,
"loss": 0.0901,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027233216911554337,
"step": 105,
"valid_targets_mean": 5279.3,
"valid_targets_min": 393
},
{
"epoch": 2.972972972972973,
"grad_norm": 0.18306212162162813,
"learning_rate": 2.8729701221636294e-05,
"loss": 0.0865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02899256721138954,
"step": 110,
"valid_targets_mean": 5776.7,
"valid_targets_min": 1181
},
{
"epoch": 3.108108108108108,
"grad_norm": 0.15654440385954815,
"learning_rate": 2.74976877245558e-05,
"loss": 0.0843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026833169162273407,
"step": 115,
"valid_targets_mean": 5595.8,
"valid_targets_min": 395
},
{
"epoch": 3.2432432432432434,
"grad_norm": 0.13190386927690592,
"learning_rate": 2.6231610609986442e-05,
"loss": 0.0776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024204321205615997,
"step": 120,
"valid_targets_mean": 8543.6,
"valid_targets_min": 1525
},
{
"epoch": 3.3783783783783785,
"grad_norm": 0.15764713783801354,
"learning_rate": 2.493722194018082e-05,
"loss": 0.0813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024981103837490082,
"step": 125,
"valid_targets_mean": 5946.0,
"valid_targets_min": 1137
},
{
"epoch": 3.5135135135135136,
"grad_norm": 0.1529541859974241,
"learning_rate": 2.362040240291227e-05,
"loss": 0.0815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026058465242385864,
"step": 130,
"valid_targets_mean": 6072.2,
"valid_targets_min": 802
},
{
"epoch": 3.6486486486486487,
"grad_norm": 0.33060538437630244,
"learning_rate": 2.228713459423804e-05,
"loss": 0.0778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02479572780430317,
"step": 135,
"valid_targets_mean": 5208.4,
"valid_targets_min": 497
},
{
"epoch": 3.7837837837837838,
"grad_norm": 0.15640399728656484,
"learning_rate": 2.094347583827102e-05,
"loss": 0.0817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026677442714571953,
"step": 140,
"valid_targets_mean": 5517.4,
"valid_targets_min": 393
},
{
"epoch": 3.918918918918919,
"grad_norm": 0.1517001071529229,
"learning_rate": 1.9595530667445775e-05,
"loss": 0.076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026775188744068146,
"step": 145,
"valid_targets_mean": 6216.9,
"valid_targets_min": 820
},
{
"epoch": 4.054054054054054,
"grad_norm": 0.1520783489774219,
"learning_rate": 1.824942308830696e-05,
"loss": 0.0787,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02473868615925312,
"step": 150,
"valid_targets_mean": 5801.8,
"valid_targets_min": 692
},
{
"epoch": 4.1891891891891895,
"grad_norm": 0.14931092491442138,
"learning_rate": 1.691126875882263e-05,
"loss": 0.0752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021454155445098877,
"step": 155,
"valid_targets_mean": 6150.3,
"valid_targets_min": 577
},
{
"epoch": 4.324324324324325,
"grad_norm": 0.15680952631799258,
"learning_rate": 1.5587147203626934e-05,
"loss": 0.0712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022702787071466446,
"step": 160,
"valid_targets_mean": 6739.9,
"valid_targets_min": 1051
},
{
"epoch": 4.45945945945946,
"grad_norm": 0.15760466083796523,
"learning_rate": 1.4283074193424379e-05,
"loss": 0.0713,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022617951035499573,
"step": 165,
"valid_targets_mean": 6288.2,
"valid_targets_min": 579
},
{
"epoch": 4.594594594594595,
"grad_norm": 0.16238993129940618,
"learning_rate": 1.3004974414041987e-05,
"loss": 0.0738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02532375603914261,
"step": 170,
"valid_targets_mean": 5912.8,
"valid_targets_min": 1757
},
{
"epoch": 4.72972972972973,
"grad_norm": 0.1636253567113291,
"learning_rate": 1.1758654549299735e-05,
"loss": 0.0711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022857841104269028,
"step": 175,
"valid_targets_mean": 5464.2,
"valid_targets_min": 577
},
{
"epoch": 4.864864864864865,
"grad_norm": 0.18989576780948067,
"learning_rate": 1.0549776899989686e-05,
"loss": 0.0727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021705035120248795,
"step": 180,
"valid_targets_mean": 7371.7,
"valid_targets_min": 355
},
{
"epoch": 5.0,
"grad_norm": 0.17912554773879225,
"learning_rate": 9.3838336588184e-06,
"loss": 0.074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029517250135540962,
"step": 185,
"valid_targets_mean": 5847.1,
"valid_targets_min": 342
},
{
"epoch": 5.135135135135135,
"grad_norm": 0.17463959150551875,
"learning_rate": 8.266121958187246e-06,
"loss": 0.0709,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024629643186926842,
"step": 190,
"valid_targets_mean": 7010.6,
"valid_targets_min": 1067
},
{
"epoch": 5.27027027027027,
"grad_norm": 0.16012384004377175,
"learning_rate": 7.201719804173797e-06,
"loss": 0.0683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02399550750851631,
"step": 195,
"valid_targets_mean": 6869.6,
"valid_targets_min": 405
},
{
"epoch": 5.405405405405405,
"grad_norm": 0.27301642675588644,
"learning_rate": 6.1954630060516005e-06,
"loss": 0.0676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023653771728277206,
"step": 200,
"valid_targets_mean": 4831.9,
"valid_targets_min": 1161
},
{
"epoch": 5.54054054054054,
"grad_norm": 0.1781041058075561,
"learning_rate": 5.2519232061624255e-06,
"loss": 0.0698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02264336310327053,
"step": 205,
"valid_targets_mean": 6068.0,
"valid_targets_min": 467
},
{
"epoch": 5.675675675675675,
"grad_norm": 0.1623201456410524,
"learning_rate": 4.375387109955953e-06,
"loss": 0.0693,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022714542225003242,
"step": 210,
"valid_targets_mean": 6206.7,
"valid_targets_min": 1846
},
{
"epoch": 5.8108108108108105,
"grad_norm": 0.18485993941649267,
"learning_rate": 3.569837010559505e-06,
"loss": 0.0661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023270253092050552,
"step": 215,
"valid_targets_mean": 7176.0,
"valid_targets_min": 372
},
{
"epoch": 5.945945945945946,
"grad_norm": 0.18961469137543394,
"learning_rate": 2.838932696358798e-06,
"loss": 0.0695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024968810379505157,
"step": 220,
"valid_targets_mean": 6053.5,
"valid_targets_min": 427
},
{
"epoch": 6.081081081081081,
"grad_norm": 0.15015876207979317,
"learning_rate": 2.1859948237874517e-06,
"loss": 0.0681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01862494647502899,
"step": 225,
"valid_targets_mean": 5860.2,
"valid_targets_min": 387
},
{
"epoch": 6.216216216216216,
"grad_norm": 0.15711014903589288,
"learning_rate": 1.6139898308664093e-06,
"loss": 0.0663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02179352566599846,
"step": 230,
"valid_targets_mean": 6635.5,
"valid_targets_min": 2015
},
{
"epoch": 6.351351351351352,
"grad_norm": 0.19367059367410314,
"learning_rate": 1.1255164600341816e-06,
"loss": 0.0674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022510964423418045,
"step": 235,
"valid_targets_mean": 5707.8,
"valid_targets_min": 445
},
{
"epoch": 6.486486486486487,
"grad_norm": 0.17844773460429106,
"learning_rate": 7.227939514977422e-07,
"loss": 0.0666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023919889703392982,
"step": 240,
"valid_targets_mean": 5842.8,
"valid_targets_min": 453
},
{
"epoch": 6.621621621621622,
"grad_norm": 0.16782709344204977,
"learning_rate": 4.0765196074406433e-07,
"loss": 0.0697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01854727789759636,
"step": 245,
"valid_targets_mean": 5001.1,
"valid_targets_min": 1381
},
{
"epoch": 6.756756756756757,
"grad_norm": 0.16683190263277645,
"learning_rate": 1.8152224601943435e-07,
"loss": 0.0666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02349766716361046,
"step": 250,
"valid_targets_mean": 7224.0,
"valid_targets_min": 405
},
{
"epoch": 6.891891891891892,
"grad_norm": 0.165105262824618,
"learning_rate": 4.5432163541960785e-08,
"loss": 0.0668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026979651302099228,
"step": 255,
"valid_targets_mean": 7160.0,
"valid_targets_min": 434
},
{
"epoch": 7.0,
"step": 259,
"total_flos": 1.36492518344124e+18,
"train_loss": 0.0,
"train_runtime": 1.313,
"train_samples_per_second": 18873.051,
"train_steps_per_second": 197.261
}
],
"logging_steps": 5,
"max_steps": 259,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.36492518344124e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}