Model: penfever/kimi-k2-swesmith_with_plain_docker-sandboxes-maxeps-32k Source: Original Platform
1874 lines
52 KiB
JSON
1874 lines
52 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 833,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.04219409282700422,
|
|
"grad_norm": 13.737735517308288,
|
|
"learning_rate": 1.904761904761905e-06,
|
|
"loss": 0.8138,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.42325860261917114,
|
|
"step": 5,
|
|
"valid_targets_mean": 9300.0,
|
|
"valid_targets_min": 7296
|
|
},
|
|
{
|
|
"epoch": 0.08438818565400844,
|
|
"grad_norm": 8.598223125161901,
|
|
"learning_rate": 4.2857142857142855e-06,
|
|
"loss": 0.7793,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.38333451747894287,
|
|
"step": 10,
|
|
"valid_targets_mean": 9275.5,
|
|
"valid_targets_min": 7158
|
|
},
|
|
{
|
|
"epoch": 0.12658227848101267,
|
|
"grad_norm": 3.908004842191187,
|
|
"learning_rate": 6.666666666666667e-06,
|
|
"loss": 0.6819,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.34921586513519287,
|
|
"step": 15,
|
|
"valid_targets_mean": 8389.4,
|
|
"valid_targets_min": 6233
|
|
},
|
|
{
|
|
"epoch": 0.16877637130801687,
|
|
"grad_norm": 2.3954669922452503,
|
|
"learning_rate": 9.047619047619049e-06,
|
|
"loss": 0.6067,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2741207480430603,
|
|
"step": 20,
|
|
"valid_targets_mean": 8933.9,
|
|
"valid_targets_min": 6921
|
|
},
|
|
{
|
|
"epoch": 0.2109704641350211,
|
|
"grad_norm": 1.262022302568234,
|
|
"learning_rate": 1.1428571428571429e-05,
|
|
"loss": 0.5663,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.27063384652137756,
|
|
"step": 25,
|
|
"valid_targets_mean": 8693.9,
|
|
"valid_targets_min": 5525
|
|
},
|
|
{
|
|
"epoch": 0.25316455696202533,
|
|
"grad_norm": 0.9175876804234998,
|
|
"learning_rate": 1.3809523809523811e-05,
|
|
"loss": 0.5036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.24980702996253967,
|
|
"step": 30,
|
|
"valid_targets_mean": 9026.4,
|
|
"valid_targets_min": 7000
|
|
},
|
|
{
|
|
"epoch": 0.29535864978902954,
|
|
"grad_norm": 0.7282592599899416,
|
|
"learning_rate": 1.6190476190476193e-05,
|
|
"loss": 0.4921,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22725015878677368,
|
|
"step": 35,
|
|
"valid_targets_mean": 8819.1,
|
|
"valid_targets_min": 7233
|
|
},
|
|
{
|
|
"epoch": 0.33755274261603374,
|
|
"grad_norm": 0.5629122844919103,
|
|
"learning_rate": 1.8571428571428575e-05,
|
|
"loss": 0.4657,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21749253571033478,
|
|
"step": 40,
|
|
"valid_targets_mean": 9115.0,
|
|
"valid_targets_min": 6864
|
|
},
|
|
{
|
|
"epoch": 0.379746835443038,
|
|
"grad_norm": 0.5170770216436973,
|
|
"learning_rate": 2.0952380952380954e-05,
|
|
"loss": 0.4481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2065398395061493,
|
|
"step": 45,
|
|
"valid_targets_mean": 8547.1,
|
|
"valid_targets_min": 6900
|
|
},
|
|
{
|
|
"epoch": 0.4219409282700422,
|
|
"grad_norm": 0.5034342142568142,
|
|
"learning_rate": 2.3333333333333336e-05,
|
|
"loss": 0.4218,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1998848021030426,
|
|
"step": 50,
|
|
"valid_targets_mean": 7835.2,
|
|
"valid_targets_min": 4698
|
|
},
|
|
{
|
|
"epoch": 0.4641350210970464,
|
|
"grad_norm": 0.4545350747053312,
|
|
"learning_rate": 2.5714285714285718e-05,
|
|
"loss": 0.4183,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21893973648548126,
|
|
"step": 55,
|
|
"valid_targets_mean": 9180.1,
|
|
"valid_targets_min": 7051
|
|
},
|
|
{
|
|
"epoch": 0.5063291139240507,
|
|
"grad_norm": 0.44398357660292653,
|
|
"learning_rate": 2.8095238095238096e-05,
|
|
"loss": 0.407,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19019050896167755,
|
|
"step": 60,
|
|
"valid_targets_mean": 8196.6,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 0.5485232067510548,
|
|
"grad_norm": 0.43196150110984444,
|
|
"learning_rate": 3.047619047619048e-05,
|
|
"loss": 0.3993,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18999773263931274,
|
|
"step": 65,
|
|
"valid_targets_mean": 8886.4,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 0.5907172995780591,
|
|
"grad_norm": 0.4356694664798871,
|
|
"learning_rate": 3.285714285714286e-05,
|
|
"loss": 0.3694,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.18235838413238525,
|
|
"step": 70,
|
|
"valid_targets_mean": 8577.5,
|
|
"valid_targets_min": 5854
|
|
},
|
|
{
|
|
"epoch": 0.6329113924050633,
|
|
"grad_norm": 0.4879795309609486,
|
|
"learning_rate": 3.523809523809524e-05,
|
|
"loss": 0.3914,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.2244984209537506,
|
|
"step": 75,
|
|
"valid_targets_mean": 9445.2,
|
|
"valid_targets_min": 7589
|
|
},
|
|
{
|
|
"epoch": 0.6751054852320675,
|
|
"grad_norm": 0.4591287171448197,
|
|
"learning_rate": 3.761904761904762e-05,
|
|
"loss": 0.3652,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.19888997077941895,
|
|
"step": 80,
|
|
"valid_targets_mean": 8648.0,
|
|
"valid_targets_min": 6995
|
|
},
|
|
{
|
|
"epoch": 0.7172995780590717,
|
|
"grad_norm": 0.43370135355956757,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15653540194034576,
|
|
"step": 85,
|
|
"valid_targets_mean": 7976.5,
|
|
"valid_targets_min": 5676
|
|
},
|
|
{
|
|
"epoch": 0.759493670886076,
|
|
"grad_norm": 0.43614169159655086,
|
|
"learning_rate": 3.9995601949609725e-05,
|
|
"loss": 0.3515,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16819864511489868,
|
|
"step": 90,
|
|
"valid_targets_mean": 8489.6,
|
|
"valid_targets_min": 5651
|
|
},
|
|
{
|
|
"epoch": 0.8016877637130801,
|
|
"grad_norm": 0.47270502658473995,
|
|
"learning_rate": 3.998240973272361e-05,
|
|
"loss": 0.3544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20371896028518677,
|
|
"step": 95,
|
|
"valid_targets_mean": 8980.9,
|
|
"valid_targets_min": 5911
|
|
},
|
|
{
|
|
"epoch": 0.8438818565400844,
|
|
"grad_norm": 0.44366932292888417,
|
|
"learning_rate": 3.996042915134512e-05,
|
|
"loss": 0.355,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.20148850977420807,
|
|
"step": 100,
|
|
"valid_targets_mean": 9147.2,
|
|
"valid_targets_min": 6069
|
|
},
|
|
{
|
|
"epoch": 0.8860759493670886,
|
|
"grad_norm": 0.450738888687614,
|
|
"learning_rate": 3.9929669872644716e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13800635933876038,
|
|
"step": 105,
|
|
"valid_targets_mean": 6815.9,
|
|
"valid_targets_min": 1124
|
|
},
|
|
{
|
|
"epoch": 0.9282700421940928,
|
|
"grad_norm": 0.41039396298425773,
|
|
"learning_rate": 3.989014542470815e-05,
|
|
"loss": 0.3378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16703501343727112,
|
|
"step": 110,
|
|
"valid_targets_mean": 10058.6,
|
|
"valid_targets_min": 8736
|
|
},
|
|
{
|
|
"epoch": 0.9704641350210971,
|
|
"grad_norm": 0.40658466169041685,
|
|
"learning_rate": 3.98418731905868e-05,
|
|
"loss": 0.3441,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1470247507095337,
|
|
"step": 115,
|
|
"valid_targets_mean": 8467.8,
|
|
"valid_targets_min": 4203
|
|
},
|
|
{
|
|
"epoch": 1.0084388185654007,
|
|
"grad_norm": 0.42706146525288,
|
|
"learning_rate": 3.978487440065248e-05,
|
|
"loss": 0.3478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1719146966934204,
|
|
"step": 120,
|
|
"valid_targets_mean": 9109.1,
|
|
"valid_targets_min": 6322
|
|
},
|
|
{
|
|
"epoch": 1.0506329113924051,
|
|
"grad_norm": 0.3862263984474135,
|
|
"learning_rate": 3.9719174123260214e-05,
|
|
"loss": 0.3238,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15366646647453308,
|
|
"step": 125,
|
|
"valid_targets_mean": 9161.2,
|
|
"valid_targets_min": 6619
|
|
},
|
|
{
|
|
"epoch": 1.0928270042194093,
|
|
"grad_norm": 0.46771718995267086,
|
|
"learning_rate": 3.964480125372307e-05,
|
|
"loss": 0.3114,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1375672072172165,
|
|
"step": 130,
|
|
"valid_targets_mean": 7859.0,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 1.1350210970464134,
|
|
"grad_norm": 0.43449504183040244,
|
|
"learning_rate": 3.9561788501603836e-05,
|
|
"loss": 0.3161,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12888583540916443,
|
|
"step": 135,
|
|
"valid_targets_mean": 7176.8,
|
|
"valid_targets_min": 1462
|
|
},
|
|
{
|
|
"epoch": 1.1772151898734178,
|
|
"grad_norm": 0.43422331056103003,
|
|
"learning_rate": 3.94701723763292e-05,
|
|
"loss": 0.3225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15962457656860352,
|
|
"step": 140,
|
|
"valid_targets_mean": 8854.5,
|
|
"valid_targets_min": 6056
|
|
},
|
|
{
|
|
"epoch": 1.219409282700422,
|
|
"grad_norm": 0.47515202494067976,
|
|
"learning_rate": 3.936999317113271e-05,
|
|
"loss": 0.3243,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1411084532737732,
|
|
"step": 145,
|
|
"valid_targets_mean": 8753.8,
|
|
"valid_targets_min": 5301
|
|
},
|
|
{
|
|
"epoch": 1.261603375527426,
|
|
"grad_norm": 0.40784537683056815,
|
|
"learning_rate": 3.926129494533362e-05,
|
|
"loss": 0.3125,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14677849411964417,
|
|
"step": 150,
|
|
"valid_targets_mean": 8915.6,
|
|
"valid_targets_min": 5868
|
|
},
|
|
{
|
|
"epoch": 1.3037974683544304,
|
|
"grad_norm": 0.42669566237324436,
|
|
"learning_rate": 3.914412550495937e-05,
|
|
"loss": 0.3189,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13394805788993835,
|
|
"step": 155,
|
|
"valid_targets_mean": 7279.6,
|
|
"valid_targets_min": 1145
|
|
},
|
|
{
|
|
"epoch": 1.3459915611814346,
|
|
"grad_norm": 0.41003883522018736,
|
|
"learning_rate": 3.901853638172025e-05,
|
|
"loss": 0.3179,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16570201516151428,
|
|
"step": 160,
|
|
"valid_targets_mean": 9676.4,
|
|
"valid_targets_min": 8776
|
|
},
|
|
{
|
|
"epoch": 1.3881856540084387,
|
|
"grad_norm": 0.4090145854881866,
|
|
"learning_rate": 3.888458281034551e-05,
|
|
"loss": 0.2767,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13220128417015076,
|
|
"step": 165,
|
|
"valid_targets_mean": 8463.6,
|
|
"valid_targets_min": 352
|
|
},
|
|
{
|
|
"epoch": 1.4303797468354431,
|
|
"grad_norm": 0.4158827283938909,
|
|
"learning_rate": 3.874232370429085e-05,
|
|
"loss": 0.3099,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1609145700931549,
|
|
"step": 170,
|
|
"valid_targets_mean": 9337.5,
|
|
"valid_targets_min": 8545
|
|
},
|
|
{
|
|
"epoch": 1.4725738396624473,
|
|
"grad_norm": 0.4624418763799199,
|
|
"learning_rate": 3.8591821629827946e-05,
|
|
"loss": 0.3101,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15375180542469025,
|
|
"step": 175,
|
|
"valid_targets_mean": 8644.0,
|
|
"valid_targets_min": 7177
|
|
},
|
|
{
|
|
"epoch": 1.5147679324894514,
|
|
"grad_norm": 0.44905907331860323,
|
|
"learning_rate": 3.8433142778527536e-05,
|
|
"loss": 0.3149,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13599535822868347,
|
|
"step": 180,
|
|
"valid_targets_mean": 7853.4,
|
|
"valid_targets_min": 4716
|
|
},
|
|
{
|
|
"epoch": 1.5569620253164556,
|
|
"grad_norm": 0.43202384347831446,
|
|
"learning_rate": 3.826635693814801e-05,
|
|
"loss": 0.3003,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14607158303260803,
|
|
"step": 185,
|
|
"valid_targets_mean": 7725.0,
|
|
"valid_targets_min": 5193
|
|
},
|
|
{
|
|
"epoch": 1.59915611814346,
|
|
"grad_norm": 0.4250126556019548,
|
|
"learning_rate": 3.8091537461942395e-05,
|
|
"loss": 0.3262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1532595157623291,
|
|
"step": 190,
|
|
"valid_targets_mean": 8313.8,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 1.6413502109704643,
|
|
"grad_norm": 0.4401245618708091,
|
|
"learning_rate": 3.7908761236397265e-05,
|
|
"loss": 0.2855,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15331213176250458,
|
|
"step": 195,
|
|
"valid_targets_mean": 8088.1,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 1.6835443037974684,
|
|
"grad_norm": 0.466081852025086,
|
|
"learning_rate": 3.771810864741761e-05,
|
|
"loss": 0.2953,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.156716451048851,
|
|
"step": 200,
|
|
"valid_targets_mean": 9015.5,
|
|
"valid_targets_min": 4203
|
|
},
|
|
{
|
|
"epoch": 1.7257383966244726,
|
|
"grad_norm": 0.44420961731157504,
|
|
"learning_rate": 3.751966354497278e-05,
|
|
"loss": 0.3126,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12232231348752975,
|
|
"step": 205,
|
|
"valid_targets_mean": 7068.8,
|
|
"valid_targets_min": 1389
|
|
},
|
|
{
|
|
"epoch": 1.7679324894514767,
|
|
"grad_norm": 0.44094509994269193,
|
|
"learning_rate": 3.7313513206218794e-05,
|
|
"loss": 0.3115,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15219950675964355,
|
|
"step": 210,
|
|
"valid_targets_mean": 6983.6,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 1.810126582278481,
|
|
"grad_norm": 0.4363001268331526,
|
|
"learning_rate": 3.709974829711344e-05,
|
|
"loss": 0.3157,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13169416785240173,
|
|
"step": 215,
|
|
"valid_targets_mean": 7780.9,
|
|
"valid_targets_min": 1318
|
|
},
|
|
{
|
|
"epoch": 1.8523206751054853,
|
|
"grad_norm": 0.46903212787997367,
|
|
"learning_rate": 3.6878462832540903e-05,
|
|
"loss": 0.3129,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1425689458847046,
|
|
"step": 220,
|
|
"valid_targets_mean": 8625.2,
|
|
"valid_targets_min": 6122
|
|
},
|
|
{
|
|
"epoch": 1.8945147679324894,
|
|
"grad_norm": 0.41310139602425183,
|
|
"learning_rate": 3.6649754134963564e-05,
|
|
"loss": 0.3011,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13354924321174622,
|
|
"step": 225,
|
|
"valid_targets_mean": 8469.6,
|
|
"valid_targets_min": 4623
|
|
},
|
|
{
|
|
"epoch": 1.9367088607594938,
|
|
"grad_norm": 0.4087947045064101,
|
|
"learning_rate": 3.64137227916191e-05,
|
|
"loss": 0.3079,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15488475561141968,
|
|
"step": 230,
|
|
"valid_targets_mean": 8825.4,
|
|
"valid_targets_min": 6185
|
|
},
|
|
{
|
|
"epoch": 1.978902953586498,
|
|
"grad_norm": 0.4223091429231445,
|
|
"learning_rate": 3.6170472610281674e-05,
|
|
"loss": 0.2883,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11964912712574005,
|
|
"step": 235,
|
|
"valid_targets_mean": 7039.6,
|
|
"valid_targets_min": 775
|
|
},
|
|
{
|
|
"epoch": 2.0168776371308015,
|
|
"grad_norm": 0.42112252103307546,
|
|
"learning_rate": 3.592011057360678e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1303289830684662,
|
|
"step": 240,
|
|
"valid_targets_mean": 7897.1,
|
|
"valid_targets_min": 782
|
|
},
|
|
{
|
|
"epoch": 2.059071729957806,
|
|
"grad_norm": 0.4276205778137949,
|
|
"learning_rate": 3.5662746792079726e-05,
|
|
"loss": 0.2875,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14901086688041687,
|
|
"step": 245,
|
|
"valid_targets_mean": 8519.6,
|
|
"valid_targets_min": 1031
|
|
},
|
|
{
|
|
"epoch": 2.1012658227848102,
|
|
"grad_norm": 0.4089545502307839,
|
|
"learning_rate": 3.53984944555885e-05,
|
|
"loss": 0.2765,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14145910739898682,
|
|
"step": 250,
|
|
"valid_targets_mean": 9206.6,
|
|
"valid_targets_min": 7423
|
|
},
|
|
{
|
|
"epoch": 2.1434599156118144,
|
|
"grad_norm": 0.495227570105931,
|
|
"learning_rate": 3.512746978364227e-05,
|
|
"loss": 0.2902,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15283076465129852,
|
|
"step": 255,
|
|
"valid_targets_mean": 8669.4,
|
|
"valid_targets_min": 5525
|
|
},
|
|
{
|
|
"epoch": 2.1856540084388185,
|
|
"grad_norm": 0.42413672320413554,
|
|
"learning_rate": 3.484979197425745e-05,
|
|
"loss": 0.287,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11562255024909973,
|
|
"step": 260,
|
|
"valid_targets_mean": 8506.1,
|
|
"valid_targets_min": 5868
|
|
},
|
|
{
|
|
"epoch": 2.2278481012658227,
|
|
"grad_norm": 0.4429223995215785,
|
|
"learning_rate": 3.456558315153382e-05,
|
|
"loss": 0.278,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11651501059532166,
|
|
"step": 265,
|
|
"valid_targets_mean": 7150.5,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 2.270042194092827,
|
|
"grad_norm": 0.3916309614586102,
|
|
"learning_rate": 3.427496831194378e-05,
|
|
"loss": 0.2854,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11857372522354126,
|
|
"step": 270,
|
|
"valid_targets_mean": 9030.6,
|
|
"valid_targets_min": 7393
|
|
},
|
|
{
|
|
"epoch": 2.3122362869198314,
|
|
"grad_norm": 0.4408587328354608,
|
|
"learning_rate": 3.3978075269358175e-05,
|
|
"loss": 0.2797,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14032971858978271,
|
|
"step": 275,
|
|
"valid_targets_mean": 8928.5,
|
|
"valid_targets_min": 6624
|
|
},
|
|
{
|
|
"epoch": 2.3544303797468356,
|
|
"grad_norm": 0.5255984072468615,
|
|
"learning_rate": 3.3675034598833196e-05,
|
|
"loss": 0.286,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1699741780757904,
|
|
"step": 280,
|
|
"valid_targets_mean": 9138.8,
|
|
"valid_targets_min": 7798
|
|
},
|
|
{
|
|
"epoch": 2.3966244725738397,
|
|
"grad_norm": 0.46154900583042857,
|
|
"learning_rate": 3.3365979579182774e-05,
|
|
"loss": 0.2841,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1370779126882553,
|
|
"step": 285,
|
|
"valid_targets_mean": 7857.2,
|
|
"valid_targets_min": 5341
|
|
},
|
|
{
|
|
"epoch": 2.438818565400844,
|
|
"grad_norm": 0.410815728442414,
|
|
"learning_rate": 3.3051046134361874e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13771235942840576,
|
|
"step": 290,
|
|
"valid_targets_mean": 8922.4,
|
|
"valid_targets_min": 5929
|
|
},
|
|
{
|
|
"epoch": 2.481012658227848,
|
|
"grad_norm": 0.42579984902443774,
|
|
"learning_rate": 3.27303727736865e-05,
|
|
"loss": 0.2703,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.150842547416687,
|
|
"step": 295,
|
|
"valid_targets_mean": 9481.6,
|
|
"valid_targets_min": 8016
|
|
},
|
|
{
|
|
"epoch": 2.523206751054852,
|
|
"grad_norm": 0.45157363816682605,
|
|
"learning_rate": 3.2404100530916555e-05,
|
|
"loss": 0.2754,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12719795107841492,
|
|
"step": 300,
|
|
"valid_targets_mean": 8190.1,
|
|
"valid_targets_min": 6069
|
|
},
|
|
{
|
|
"epoch": 2.5654008438818563,
|
|
"grad_norm": 0.429470249992375,
|
|
"learning_rate": 3.20723729022285e-05,
|
|
"loss": 0.2699,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10490024089813232,
|
|
"step": 305,
|
|
"valid_targets_mean": 7513.5,
|
|
"valid_targets_min": 1012
|
|
},
|
|
{
|
|
"epoch": 2.607594936708861,
|
|
"grad_norm": 0.44240780660157275,
|
|
"learning_rate": 3.173533578310503e-05,
|
|
"loss": 0.2846,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15472307801246643,
|
|
"step": 310,
|
|
"valid_targets_mean": 9026.9,
|
|
"valid_targets_min": 6796
|
|
},
|
|
{
|
|
"epoch": 2.649789029535865,
|
|
"grad_norm": 0.46171787256178776,
|
|
"learning_rate": 3.139313740416945e-05,
|
|
"loss": 0.2734,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14282463490962982,
|
|
"step": 315,
|
|
"valid_targets_mean": 8840.5,
|
|
"valid_targets_min": 5948
|
|
},
|
|
{
|
|
"epoch": 2.691983122362869,
|
|
"grad_norm": 0.5596242010714433,
|
|
"learning_rate": 3.104592826599319e-05,
|
|
"loss": 0.2682,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13433107733726501,
|
|
"step": 320,
|
|
"valid_targets_mean": 8862.6,
|
|
"valid_targets_min": 7327
|
|
},
|
|
{
|
|
"epoch": 2.7341772151898733,
|
|
"grad_norm": 0.41372708901928346,
|
|
"learning_rate": 3.06938610729048e-05,
|
|
"loss": 0.2744,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13239914178848267,
|
|
"step": 325,
|
|
"valid_targets_mean": 8498.8,
|
|
"valid_targets_min": 5562
|
|
},
|
|
{
|
|
"epoch": 2.7763713080168775,
|
|
"grad_norm": 1.6545728711184768,
|
|
"learning_rate": 3.0337090665829883e-05,
|
|
"loss": 0.2861,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1271859109401703,
|
|
"step": 330,
|
|
"valid_targets_mean": 8136.5,
|
|
"valid_targets_min": 5643
|
|
},
|
|
{
|
|
"epoch": 2.818565400843882,
|
|
"grad_norm": 0.4317369059905672,
|
|
"learning_rate": 2.9975773954191246e-05,
|
|
"loss": 0.2847,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16068033874034882,
|
|
"step": 335,
|
|
"valid_targets_mean": 9313.6,
|
|
"valid_targets_min": 8083
|
|
},
|
|
{
|
|
"epoch": 2.8607594936708862,
|
|
"grad_norm": 0.4361200075559799,
|
|
"learning_rate": 2.9610069846899355e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12818226218223572,
|
|
"step": 340,
|
|
"valid_targets_mean": 8236.2,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 2.9029535864978904,
|
|
"grad_norm": 0.41932505340642173,
|
|
"learning_rate": 2.9240139182463384e-05,
|
|
"loss": 0.2798,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13603907823562622,
|
|
"step": 345,
|
|
"valid_targets_mean": 8153.4,
|
|
"valid_targets_min": 1722
|
|
},
|
|
{
|
|
"epoch": 2.9451476793248945,
|
|
"grad_norm": 0.4285742105739042,
|
|
"learning_rate": 2.8866144658253642e-05,
|
|
"loss": 0.2826,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13186733424663544,
|
|
"step": 350,
|
|
"valid_targets_mean": 7989.5,
|
|
"valid_targets_min": 4996
|
|
},
|
|
{
|
|
"epoch": 2.9873417721518987,
|
|
"grad_norm": 0.46406691637390524,
|
|
"learning_rate": 2.8488250758946453e-05,
|
|
"loss": 0.2709,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14630965888500214,
|
|
"step": 355,
|
|
"valid_targets_mean": 8137.5,
|
|
"valid_targets_min": 3982
|
|
},
|
|
{
|
|
"epoch": 3.0253164556962027,
|
|
"grad_norm": 0.43995204460384685,
|
|
"learning_rate": 2.8106623684182944e-05,
|
|
"loss": 0.2549,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13689684867858887,
|
|
"step": 360,
|
|
"valid_targets_mean": 8913.4,
|
|
"valid_targets_min": 7312
|
|
},
|
|
{
|
|
"epoch": 3.067510548523207,
|
|
"grad_norm": 0.45122017845034534,
|
|
"learning_rate": 2.7721431275473634e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13735206425189972,
|
|
"step": 365,
|
|
"valid_targets_mean": 8872.2,
|
|
"valid_targets_min": 4723
|
|
},
|
|
{
|
|
"epoch": 3.109704641350211,
|
|
"grad_norm": 0.41848489063381833,
|
|
"learning_rate": 2.733284294238086e-05,
|
|
"loss": 0.2606,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10563844442367554,
|
|
"step": 370,
|
|
"valid_targets_mean": 8790.6,
|
|
"valid_targets_min": 6799
|
|
},
|
|
{
|
|
"epoch": 3.151898734177215,
|
|
"grad_norm": 0.48612738303501324,
|
|
"learning_rate": 2.694102958801163e-05,
|
|
"loss": 0.261,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13847270607948303,
|
|
"step": 375,
|
|
"valid_targets_mean": 7784.8,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 3.1940928270042193,
|
|
"grad_norm": 0.43439045015611505,
|
|
"learning_rate": 2.654616353385354e-05,
|
|
"loss": 0.2545,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11931806802749634,
|
|
"step": 380,
|
|
"valid_targets_mean": 8220.5,
|
|
"valid_targets_min": 6466
|
|
},
|
|
{
|
|
"epoch": 3.2362869198312234,
|
|
"grad_norm": 0.4672629305905832,
|
|
"learning_rate": 2.6148418443986967e-05,
|
|
"loss": 0.2573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12390561401844025,
|
|
"step": 385,
|
|
"valid_targets_mean": 8790.5,
|
|
"valid_targets_min": 7425
|
|
},
|
|
{
|
|
"epoch": 3.278481012658228,
|
|
"grad_norm": 0.4814651192611666,
|
|
"learning_rate": 2.5747969248706675e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15146447718143463,
|
|
"step": 390,
|
|
"valid_targets_mean": 9032.1,
|
|
"valid_targets_min": 5188
|
|
},
|
|
{
|
|
"epoch": 3.320675105485232,
|
|
"grad_norm": 0.4374830944824983,
|
|
"learning_rate": 2.5344992067586623e-05,
|
|
"loss": 0.2647,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13405266404151917,
|
|
"step": 395,
|
|
"valid_targets_mean": 8223.2,
|
|
"valid_targets_min": 3452
|
|
},
|
|
{
|
|
"epoch": 3.3628691983122363,
|
|
"grad_norm": 0.4732142324779574,
|
|
"learning_rate": 2.4939664132021685e-05,
|
|
"loss": 0.2558,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10236911475658417,
|
|
"step": 400,
|
|
"valid_targets_mean": 7064.0,
|
|
"valid_targets_min": 1079
|
|
},
|
|
{
|
|
"epoch": 3.4050632911392404,
|
|
"grad_norm": 0.43954374313846106,
|
|
"learning_rate": 2.4532163707280372e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12227586656808853,
|
|
"step": 405,
|
|
"valid_targets_mean": 8465.5,
|
|
"valid_targets_min": 6739
|
|
},
|
|
{
|
|
"epoch": 3.4472573839662446,
|
|
"grad_norm": 0.42292011324295486,
|
|
"learning_rate": 2.4122670014102905e-05,
|
|
"loss": 0.262,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11511904001235962,
|
|
"step": 410,
|
|
"valid_targets_mean": 9274.5,
|
|
"valid_targets_min": 6734
|
|
},
|
|
{
|
|
"epoch": 3.489451476793249,
|
|
"grad_norm": 0.4344835895735963,
|
|
"learning_rate": 2.371136314987898e-05,
|
|
"loss": 0.2822,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14791841804981232,
|
|
"step": 415,
|
|
"valid_targets_mean": 8766.1,
|
|
"valid_targets_min": 6028
|
|
},
|
|
{
|
|
"epoch": 3.5316455696202533,
|
|
"grad_norm": 0.4261397076907197,
|
|
"learning_rate": 2.329842400944008e-05,
|
|
"loss": 0.2528,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10815391689538956,
|
|
"step": 420,
|
|
"valid_targets_mean": 8315.0,
|
|
"valid_targets_min": 7097
|
|
},
|
|
{
|
|
"epoch": 3.5738396624472575,
|
|
"grad_norm": 0.5622442435750628,
|
|
"learning_rate": 2.2884034205500977e-05,
|
|
"loss": 0.2575,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13450974225997925,
|
|
"step": 425,
|
|
"valid_targets_mean": 8429.6,
|
|
"valid_targets_min": 6223
|
|
},
|
|
{
|
|
"epoch": 3.6160337552742616,
|
|
"grad_norm": 0.45056620552671345,
|
|
"learning_rate": 2.246837598878557e-05,
|
|
"loss": 0.2728,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12603911757469177,
|
|
"step": 430,
|
|
"valid_targets_mean": 8927.6,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 3.6582278481012658,
|
|
"grad_norm": 0.44591198215906896,
|
|
"learning_rate": 2.2051632167872072e-05,
|
|
"loss": 0.2645,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16440054774284363,
|
|
"step": 435,
|
|
"valid_targets_mean": 9716.9,
|
|
"valid_targets_min": 8934
|
|
},
|
|
{
|
|
"epoch": 3.70042194092827,
|
|
"grad_norm": 0.4244520408699371,
|
|
"learning_rate": 2.1633986028792915e-05,
|
|
"loss": 0.2468,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12980708479881287,
|
|
"step": 440,
|
|
"valid_targets_mean": 9191.9,
|
|
"valid_targets_min": 7691
|
|
},
|
|
{
|
|
"epoch": 3.742616033755274,
|
|
"grad_norm": 0.4400154252728221,
|
|
"learning_rate": 2.1215621254424592e-05,
|
|
"loss": 0.2621,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14869439601898193,
|
|
"step": 445,
|
|
"valid_targets_mean": 9304.6,
|
|
"valid_targets_min": 8242
|
|
},
|
|
{
|
|
"epoch": 3.7848101265822782,
|
|
"grad_norm": 0.4312069617379547,
|
|
"learning_rate": 2.0796721843703028e-05,
|
|
"loss": 0.2667,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14449357986450195,
|
|
"step": 450,
|
|
"valid_targets_mean": 8888.6,
|
|
"valid_targets_min": 4545
|
|
},
|
|
{
|
|
"epoch": 3.827004219409283,
|
|
"grad_norm": 0.6226769603382436,
|
|
"learning_rate": 2.0377472030699895e-05,
|
|
"loss": 0.2561,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13251638412475586,
|
|
"step": 455,
|
|
"valid_targets_mean": 9287.4,
|
|
"valid_targets_min": 7232
|
|
},
|
|
{
|
|
"epoch": 3.869198312236287,
|
|
"grad_norm": 0.41779119755506317,
|
|
"learning_rate": 1.995805620359557e-05,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11737249791622162,
|
|
"step": 460,
|
|
"valid_targets_mean": 8372.5,
|
|
"valid_targets_min": 6429
|
|
},
|
|
{
|
|
"epoch": 3.911392405063291,
|
|
"grad_norm": 0.42919575535179477,
|
|
"learning_rate": 1.9538658823584258e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12399604916572571,
|
|
"step": 465,
|
|
"valid_targets_mean": 8499.9,
|
|
"valid_targets_min": 7146
|
|
},
|
|
{
|
|
"epoch": 3.9535864978902953,
|
|
"grad_norm": 0.4282733272450913,
|
|
"learning_rate": 1.9119464343747048e-05,
|
|
"loss": 0.2576,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12035520374774933,
|
|
"step": 470,
|
|
"valid_targets_mean": 9111.9,
|
|
"valid_targets_min": 7021
|
|
},
|
|
{
|
|
"epoch": 3.9957805907173,
|
|
"grad_norm": 0.4688557446235436,
|
|
"learning_rate": 1.8700657127928495e-05,
|
|
"loss": 0.2579,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14246785640716553,
|
|
"step": 475,
|
|
"valid_targets_mean": 8446.9,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 4.033755274261603,
|
|
"grad_norm": 0.4179516240442328,
|
|
"learning_rate": 1.8282421369652514e-05,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12743636965751648,
|
|
"step": 480,
|
|
"valid_targets_mean": 8593.9,
|
|
"valid_targets_min": 6807
|
|
},
|
|
{
|
|
"epoch": 4.075949367088608,
|
|
"grad_norm": 0.4159181042353588,
|
|
"learning_rate": 1.786494101111308e-05,
|
|
"loss": 0.2435,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12010753154754639,
|
|
"step": 485,
|
|
"valid_targets_mean": 9153.5,
|
|
"valid_targets_min": 7211
|
|
},
|
|
{
|
|
"epoch": 4.118143459915612,
|
|
"grad_norm": 0.43857477399241507,
|
|
"learning_rate": 1.7448399662275577e-05,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11870156228542328,
|
|
"step": 490,
|
|
"valid_targets_mean": 8510.8,
|
|
"valid_targets_min": 5260
|
|
},
|
|
{
|
|
"epoch": 4.160337552742616,
|
|
"grad_norm": 0.7799080736665794,
|
|
"learning_rate": 1.70329805201242e-05,
|
|
"loss": 0.2587,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13719968497753143,
|
|
"step": 495,
|
|
"valid_targets_mean": 8487.0,
|
|
"valid_targets_min": 5677
|
|
},
|
|
{
|
|
"epoch": 4.2025316455696204,
|
|
"grad_norm": 0.4292260280318674,
|
|
"learning_rate": 1.661886628809096e-05,
|
|
"loss": 0.2417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13319876790046692,
|
|
"step": 500,
|
|
"valid_targets_mean": 9258.0,
|
|
"valid_targets_min": 7850
|
|
},
|
|
{
|
|
"epoch": 4.244725738396625,
|
|
"grad_norm": 0.47075531459044107,
|
|
"learning_rate": 1.620623909570185e-05,
|
|
"loss": 0.2521,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12588399648666382,
|
|
"step": 505,
|
|
"valid_targets_mean": 7839.9,
|
|
"valid_targets_min": 709
|
|
},
|
|
{
|
|
"epoch": 4.286919831223629,
|
|
"grad_norm": 0.4443285639795416,
|
|
"learning_rate": 1.5795280418475313e-05,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12543919682502747,
|
|
"step": 510,
|
|
"valid_targets_mean": 9025.4,
|
|
"valid_targets_min": 6790
|
|
},
|
|
{
|
|
"epoch": 4.329113924050633,
|
|
"grad_norm": 0.45446326155202266,
|
|
"learning_rate": 1.5386170998108432e-05,
|
|
"loss": 0.2508,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12875355780124664,
|
|
"step": 515,
|
|
"valid_targets_mean": 8992.2,
|
|
"valid_targets_min": 7538
|
|
},
|
|
{
|
|
"epoch": 4.371308016877637,
|
|
"grad_norm": 0.4188354388664049,
|
|
"learning_rate": 1.4979090762985793e-05,
|
|
"loss": 0.2393,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1216769590973854,
|
|
"step": 520,
|
|
"valid_targets_mean": 9567.9,
|
|
"valid_targets_min": 8414
|
|
},
|
|
{
|
|
"epoch": 4.413502109704641,
|
|
"grad_norm": 0.5144343057216146,
|
|
"learning_rate": 1.4574218749046097e-05,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10991360992193222,
|
|
"step": 525,
|
|
"valid_targets_mean": 8216.4,
|
|
"valid_targets_min": 5783
|
|
},
|
|
{
|
|
"epoch": 4.455696202531645,
|
|
"grad_norm": 0.45077488627425316,
|
|
"learning_rate": 1.4171733021041236e-05,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12308443337678909,
|
|
"step": 530,
|
|
"valid_targets_mean": 8366.5,
|
|
"valid_targets_min": 5486
|
|
},
|
|
{
|
|
"epoch": 4.4978902953586495,
|
|
"grad_norm": 0.4438440275448954,
|
|
"learning_rate": 1.3771810594222522e-05,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1209210604429245,
|
|
"step": 535,
|
|
"valid_targets_mean": 8559.5,
|
|
"valid_targets_min": 6739
|
|
},
|
|
{
|
|
"epoch": 4.540084388185654,
|
|
"grad_norm": 0.4138439523530891,
|
|
"learning_rate": 1.3374627356488486e-05,
|
|
"loss": 0.2346,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13088755309581757,
|
|
"step": 540,
|
|
"valid_targets_mean": 9257.6,
|
|
"valid_targets_min": 5925
|
|
},
|
|
{
|
|
"epoch": 4.582278481012658,
|
|
"grad_norm": 0.4348705477932507,
|
|
"learning_rate": 1.2980357991028504e-05,
|
|
"loss": 0.2628,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12444625794887543,
|
|
"step": 545,
|
|
"valid_targets_mean": 9086.5,
|
|
"valid_targets_min": 7240
|
|
},
|
|
{
|
|
"epoch": 4.624472573839663,
|
|
"grad_norm": 0.4514383858436243,
|
|
"learning_rate": 1.2589175899496241e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14704394340515137,
|
|
"step": 550,
|
|
"valid_targets_mean": 9210.2,
|
|
"valid_targets_min": 7000
|
|
},
|
|
{
|
|
"epoch": 4.666666666666667,
|
|
"grad_norm": 0.43305229013411795,
|
|
"learning_rate": 1.2201253125746733e-05,
|
|
"loss": 0.2471,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1392921805381775,
|
|
"step": 555,
|
|
"valid_targets_mean": 9039.9,
|
|
"valid_targets_min": 7246
|
|
},
|
|
{
|
|
"epoch": 4.708860759493671,
|
|
"grad_norm": 0.4135427416829583,
|
|
"learning_rate": 1.1816760280170615e-05,
|
|
"loss": 0.2474,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10349678993225098,
|
|
"step": 560,
|
|
"valid_targets_mean": 8758.5,
|
|
"valid_targets_min": 6734
|
|
},
|
|
{
|
|
"epoch": 4.751054852320675,
|
|
"grad_norm": 0.4134170552023414,
|
|
"learning_rate": 1.1435866464658856e-05,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0989164412021637,
|
|
"step": 565,
|
|
"valid_targets_mean": 7774.6,
|
|
"valid_targets_min": 1033
|
|
},
|
|
{
|
|
"epoch": 4.793248945147679,
|
|
"grad_norm": 0.4202770280254775,
|
|
"learning_rate": 1.1058739198230856e-05,
|
|
"loss": 0.2307,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10661663115024567,
|
|
"step": 570,
|
|
"valid_targets_mean": 8591.6,
|
|
"valid_targets_min": 5296
|
|
},
|
|
{
|
|
"epoch": 4.8354430379746836,
|
|
"grad_norm": 0.42451909244520697,
|
|
"learning_rate": 1.0685544343358729e-05,
|
|
"loss": 0.2498,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12151402235031128,
|
|
"step": 575,
|
|
"valid_targets_mean": 9392.6,
|
|
"valid_targets_min": 6279
|
|
},
|
|
{
|
|
"epoch": 4.877637130801688,
|
|
"grad_norm": 0.439789899437483,
|
|
"learning_rate": 1.0316446033020198e-05,
|
|
"loss": 0.2379,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09513577073812485,
|
|
"step": 580,
|
|
"valid_targets_mean": 8077.6,
|
|
"valid_targets_min": 4893
|
|
},
|
|
{
|
|
"epoch": 4.919831223628692,
|
|
"grad_norm": 0.4316282443408433,
|
|
"learning_rate": 9.951606598512042e-06,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13961660861968994,
|
|
"step": 585,
|
|
"valid_targets_mean": 9551.0,
|
|
"valid_targets_min": 8353
|
|
},
|
|
{
|
|
"epoch": 4.962025316455696,
|
|
"grad_norm": 0.44460163153797444,
|
|
"learning_rate": 9.591186498056004e-06,
|
|
"loss": 0.2328,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11063335835933685,
|
|
"step": 590,
|
|
"valid_targets_mean": 8310.2,
|
|
"valid_targets_min": 5642
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.6232611185111987,
|
|
"learning_rate": 9.235344246228415e-06,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.21027398109436035,
|
|
"step": 595,
|
|
"valid_targets_mean": 9007.9,
|
|
"valid_targets_min": 7843
|
|
},
|
|
{
|
|
"epoch": 5.042194092827004,
|
|
"grad_norm": 0.45373400536723996,
|
|
"learning_rate": 8.884236344244734e-06,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11295656859874725,
|
|
"step": 600,
|
|
"valid_targets_mean": 8223.9,
|
|
"valid_targets_min": 6369
|
|
},
|
|
{
|
|
"epoch": 5.084388185654008,
|
|
"grad_norm": 0.44188846780421875,
|
|
"learning_rate": 8.538017211129501e-06,
|
|
"loss": 0.2267,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09402458369731903,
|
|
"step": 605,
|
|
"valid_targets_mean": 8217.8,
|
|
"valid_targets_min": 5192
|
|
},
|
|
{
|
|
"epoch": 5.1265822784810124,
|
|
"grad_norm": 0.4739239566419668,
|
|
"learning_rate": 8.196839115802071e-06,
|
|
"loss": 0.2416,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1246982291340828,
|
|
"step": 610,
|
|
"valid_targets_mean": 9335.2,
|
|
"valid_targets_min": 7233
|
|
},
|
|
{
|
|
"epoch": 5.168776371308017,
|
|
"grad_norm": 0.41388443568870104,
|
|
"learning_rate": 7.860852110107952e-06,
|
|
"loss": 0.236,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1162976399064064,
|
|
"step": 615,
|
|
"valid_targets_mean": 8525.2,
|
|
"valid_targets_min": 5947
|
|
},
|
|
{
|
|
"epoch": 5.210970464135021,
|
|
"grad_norm": 0.4584431908697567,
|
|
"learning_rate": 7.530203962825331e-06,
|
|
"loss": 0.2438,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11082412302494049,
|
|
"step": 620,
|
|
"valid_targets_mean": 8312.5,
|
|
"valid_targets_min": 3548
|
|
},
|
|
{
|
|
"epoch": 5.253164556962025,
|
|
"grad_norm": 0.5075457556667611,
|
|
"learning_rate": 7.205040094675502e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1264800876379013,
|
|
"step": 625,
|
|
"valid_targets_mean": 9783.8,
|
|
"valid_targets_min": 8094
|
|
},
|
|
{
|
|
"epoch": 5.29535864978903,
|
|
"grad_norm": 0.43671359308707003,
|
|
"learning_rate": 6.885503514366203e-06,
|
|
"loss": 0.239,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1132010817527771,
|
|
"step": 630,
|
|
"valid_targets_mean": 8917.6,
|
|
"valid_targets_min": 6726
|
|
},
|
|
{
|
|
"epoch": 5.337552742616034,
|
|
"grad_norm": 0.4375519850374428,
|
|
"learning_rate": 6.571734755695584e-06,
|
|
"loss": 0.23,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12824538350105286,
|
|
"step": 635,
|
|
"valid_targets_mean": 8502.2,
|
|
"valid_targets_min": 1497
|
|
},
|
|
{
|
|
"epoch": 5.379746835443038,
|
|
"grad_norm": 0.4243347246282382,
|
|
"learning_rate": 6.2638718157448285e-06,
|
|
"loss": 0.2342,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11869437992572784,
|
|
"step": 640,
|
|
"valid_targets_mean": 9188.5,
|
|
"valid_targets_min": 7034
|
|
},
|
|
{
|
|
"epoch": 5.421940928270042,
|
|
"grad_norm": 0.46745881796696226,
|
|
"learning_rate": 5.962050094186236e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11892999708652496,
|
|
"step": 645,
|
|
"valid_targets_mean": 8592.6,
|
|
"valid_targets_min": 6624
|
|
},
|
|
{
|
|
"epoch": 5.4641350210970465,
|
|
"grad_norm": 0.469810046983012,
|
|
"learning_rate": 5.666402333733843e-06,
|
|
"loss": 0.2372,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12667159736156464,
|
|
"step": 650,
|
|
"valid_targets_mean": 7721.2,
|
|
"valid_targets_min": 5301
|
|
},
|
|
{
|
|
"epoch": 5.506329113924051,
|
|
"grad_norm": 0.440394400681658,
|
|
"learning_rate": 5.377058561762474e-06,
|
|
"loss": 0.2344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09980838745832443,
|
|
"step": 655,
|
|
"valid_targets_mean": 7530.0,
|
|
"valid_targets_min": 4441
|
|
},
|
|
{
|
|
"epoch": 5.548523206751055,
|
|
"grad_norm": 0.46129642001131443,
|
|
"learning_rate": 5.094146033121057e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10793110728263855,
|
|
"step": 660,
|
|
"valid_targets_mean": 8216.8,
|
|
"valid_targets_min": 4893
|
|
},
|
|
{
|
|
"epoch": 5.590717299578059,
|
|
"grad_norm": 0.48121058406225553,
|
|
"learning_rate": 4.817789174165281e-06,
|
|
"loss": 0.2572,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11774282157421112,
|
|
"step": 665,
|
|
"valid_targets_mean": 9377.8,
|
|
"valid_targets_min": 7466
|
|
},
|
|
{
|
|
"epoch": 5.632911392405063,
|
|
"grad_norm": 0.47185562628992206,
|
|
"learning_rate": 4.5481095280342925e-06,
|
|
"loss": 0.2359,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10965608060359955,
|
|
"step": 670,
|
|
"valid_targets_mean": 7637.9,
|
|
"valid_targets_min": 1141
|
|
},
|
|
{
|
|
"epoch": 5.675105485232067,
|
|
"grad_norm": 0.4605629407236577,
|
|
"learning_rate": 4.285225701195383e-06,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14336133003234863,
|
|
"step": 675,
|
|
"valid_targets_mean": 8773.4,
|
|
"valid_targets_min": 5065
|
|
},
|
|
{
|
|
"epoch": 5.717299578059071,
|
|
"grad_norm": 0.39880864828224805,
|
|
"learning_rate": 4.029253311280281e-06,
|
|
"loss": 0.2439,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11196043342351913,
|
|
"step": 680,
|
|
"valid_targets_mean": 9331.1,
|
|
"valid_targets_min": 7073
|
|
},
|
|
{
|
|
"epoch": 5.759493670886076,
|
|
"grad_norm": 0.435020554643124,
|
|
"learning_rate": 3.7803049362359103e-06,
|
|
"loss": 0.2333,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11478637158870697,
|
|
"step": 685,
|
|
"valid_targets_mean": 8947.9,
|
|
"valid_targets_min": 8357
|
|
},
|
|
{
|
|
"epoch": 5.80168776371308,
|
|
"grad_norm": 0.4269181943885422,
|
|
"learning_rate": 3.5384900648120814e-06,
|
|
"loss": 0.2424,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1145181730389595,
|
|
"step": 690,
|
|
"valid_targets_mean": 9141.5,
|
|
"valid_targets_min": 6753
|
|
},
|
|
{
|
|
"epoch": 5.843881856540085,
|
|
"grad_norm": 0.40001789612061567,
|
|
"learning_rate": 3.3039150484077555e-06,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10709261149168015,
|
|
"step": 695,
|
|
"valid_targets_mean": 8723.4,
|
|
"valid_targets_min": 5732
|
|
},
|
|
{
|
|
"epoch": 5.886075949367089,
|
|
"grad_norm": 0.4272824200506153,
|
|
"learning_rate": 3.0766830542971826e-06,
|
|
"loss": 0.2316,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08846732974052429,
|
|
"step": 700,
|
|
"valid_targets_mean": 7904.5,
|
|
"valid_targets_min": 4238
|
|
},
|
|
{
|
|
"epoch": 5.928270042194093,
|
|
"grad_norm": 0.4240311265649667,
|
|
"learning_rate": 2.856894020256389e-06,
|
|
"loss": 0.2315,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10147963464260101,
|
|
"step": 705,
|
|
"valid_targets_mean": 7797.0,
|
|
"valid_targets_min": 773
|
|
},
|
|
{
|
|
"epoch": 5.970464135021097,
|
|
"grad_norm": 0.44341039833186224,
|
|
"learning_rate": 2.6446446106100855e-06,
|
|
"loss": 0.2358,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11635808646678925,
|
|
"step": 710,
|
|
"valid_targets_mean": 8042.9,
|
|
"valid_targets_min": 4819
|
|
},
|
|
{
|
|
"epoch": 6.008438818565401,
|
|
"grad_norm": 0.43454237089026004,
|
|
"learning_rate": 2.4400281737181518e-06,
|
|
"loss": 0.2485,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12369026243686676,
|
|
"step": 715,
|
|
"valid_targets_mean": 7799.2,
|
|
"valid_targets_min": 1552
|
|
},
|
|
{
|
|
"epoch": 6.050632911392405,
|
|
"grad_norm": 0.4086719126374126,
|
|
"learning_rate": 2.2431347009206107e-06,
|
|
"loss": 0.2399,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11836208403110504,
|
|
"step": 720,
|
|
"valid_targets_mean": 8866.1,
|
|
"valid_targets_min": 5937
|
|
},
|
|
{
|
|
"epoch": 6.0928270042194095,
|
|
"grad_norm": 0.47554127803953666,
|
|
"learning_rate": 2.0540507869589365e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12429390847682953,
|
|
"step": 725,
|
|
"valid_targets_mean": 7500.9,
|
|
"valid_targets_min": 1372
|
|
},
|
|
{
|
|
"epoch": 6.135021097046414,
|
|
"grad_norm": 0.4138302974266342,
|
|
"learning_rate": 1.872859591891305e-06,
|
|
"loss": 0.235,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10978598892688751,
|
|
"step": 730,
|
|
"valid_targets_mean": 9062.0,
|
|
"valid_targets_min": 5651
|
|
},
|
|
{
|
|
"epoch": 6.177215189873418,
|
|
"grad_norm": 0.477745903678315,
|
|
"learning_rate": 1.6996408045183255e-06,
|
|
"loss": 0.231,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0997207909822464,
|
|
"step": 735,
|
|
"valid_targets_mean": 8074.8,
|
|
"valid_targets_min": 4794
|
|
},
|
|
{
|
|
"epoch": 6.219409282700422,
|
|
"grad_norm": 0.39025085721052233,
|
|
"learning_rate": 1.5344706073355386e-06,
|
|
"loss": 0.2225,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1053914874792099,
|
|
"step": 740,
|
|
"valid_targets_mean": 9646.5,
|
|
"valid_targets_min": 8467
|
|
},
|
|
{
|
|
"epoch": 6.261603375527426,
|
|
"grad_norm": 0.46907873784680004,
|
|
"learning_rate": 1.377421643027963e-06,
|
|
"loss": 0.2403,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1020062193274498,
|
|
"step": 745,
|
|
"valid_targets_mean": 8838.5,
|
|
"valid_targets_min": 5929
|
|
},
|
|
{
|
|
"epoch": 6.30379746835443,
|
|
"grad_norm": 0.4255302670132069,
|
|
"learning_rate": 1.2285629825214817e-06,
|
|
"loss": 0.242,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11597343534231186,
|
|
"step": 750,
|
|
"valid_targets_mean": 8416.2,
|
|
"valid_targets_min": 6747
|
|
},
|
|
{
|
|
"epoch": 6.345991561181434,
|
|
"grad_norm": 0.4209806375747758,
|
|
"learning_rate": 1.0879600946050783e-06,
|
|
"loss": 0.2198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10497762262821198,
|
|
"step": 755,
|
|
"valid_targets_mean": 8873.2,
|
|
"valid_targets_min": 6692
|
|
},
|
|
{
|
|
"epoch": 6.3881856540084385,
|
|
"grad_norm": 0.3994620526352628,
|
|
"learning_rate": 9.556748171373663e-07,
|
|
"loss": 0.2363,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11500047892332077,
|
|
"step": 760,
|
|
"valid_targets_mean": 9397.5,
|
|
"valid_targets_min": 6864
|
|
},
|
|
{
|
|
"epoch": 6.430379746835443,
|
|
"grad_norm": 0.48476265690454917,
|
|
"learning_rate": 8.317653298499651e-07,
|
|
"loss": 0.2378,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11502040922641754,
|
|
"step": 765,
|
|
"valid_targets_mean": 8142.6,
|
|
"valid_targets_min": 1255
|
|
},
|
|
{
|
|
"epoch": 6.472573839662447,
|
|
"grad_norm": 0.4559778814783951,
|
|
"learning_rate": 7.162861287597666e-07,
|
|
"loss": 0.2408,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15468385815620422,
|
|
"step": 770,
|
|
"valid_targets_mean": 9261.8,
|
|
"valid_targets_min": 6256
|
|
},
|
|
{
|
|
"epoch": 6.514767932489452,
|
|
"grad_norm": 0.40532634311815335,
|
|
"learning_rate": 6.092880022013115e-07,
|
|
"loss": 0.2402,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12497968971729279,
|
|
"step": 775,
|
|
"valid_targets_mean": 8960.5,
|
|
"valid_targets_min": 5005
|
|
},
|
|
{
|
|
"epoch": 6.556962025316456,
|
|
"grad_norm": 0.43147417534943683,
|
|
"learning_rate": 5.108180084898284e-07,
|
|
"loss": 0.2321,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10713645815849304,
|
|
"step": 780,
|
|
"valid_targets_mean": 8562.0,
|
|
"valid_targets_min": 5754
|
|
},
|
|
{
|
|
"epoch": 6.59915611814346,
|
|
"grad_norm": 0.4178467879199177,
|
|
"learning_rate": 4.209194552247442e-07,
|
|
"loss": 0.2387,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11786434799432755,
|
|
"step": 785,
|
|
"valid_targets_mean": 8937.8,
|
|
"valid_targets_min": 6908
|
|
},
|
|
{
|
|
"epoch": 6.641350210970464,
|
|
"grad_norm": 0.4296586265719399,
|
|
"learning_rate": 3.3963188024278783e-07,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12450215220451355,
|
|
"step": 790,
|
|
"valid_targets_mean": 8674.1,
|
|
"valid_targets_min": 5603
|
|
},
|
|
{
|
|
"epoch": 6.6835443037974684,
|
|
"grad_norm": 0.4648325639162333,
|
|
"learning_rate": 2.6699103422904494e-07,
|
|
"loss": 0.2329,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13395778834819794,
|
|
"step": 795,
|
|
"valid_targets_mean": 7350.9,
|
|
"valid_targets_min": 1899
|
|
},
|
|
{
|
|
"epoch": 6.725738396624473,
|
|
"grad_norm": 0.4397893931280253,
|
|
"learning_rate": 2.030288649936285e-07,
|
|
"loss": 0.2385,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10574311763048172,
|
|
"step": 800,
|
|
"valid_targets_mean": 7157.0,
|
|
"valid_targets_min": 850
|
|
},
|
|
{
|
|
"epoch": 6.767932489451477,
|
|
"grad_norm": 0.43270603582901673,
|
|
"learning_rate": 1.477735034208805e-07,
|
|
"loss": 0.2305,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12294603884220123,
|
|
"step": 805,
|
|
"valid_targets_mean": 8442.5,
|
|
"valid_targets_min": 3355
|
|
},
|
|
{
|
|
"epoch": 6.810126582278481,
|
|
"grad_norm": 0.4125142256044064,
|
|
"learning_rate": 1.0124925109725514e-07,
|
|
"loss": 0.2394,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14932481944561005,
|
|
"step": 810,
|
|
"valid_targets_mean": 9925.2,
|
|
"valid_targets_min": 8745
|
|
},
|
|
{
|
|
"epoch": 6.852320675105485,
|
|
"grad_norm": 0.41761981991702074,
|
|
"learning_rate": 6.347656962335524e-08,
|
|
"loss": 0.226,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09456837177276611,
|
|
"step": 815,
|
|
"valid_targets_mean": 7658.6,
|
|
"valid_targets_min": 4375
|
|
},
|
|
{
|
|
"epoch": 6.894514767932489,
|
|
"grad_norm": 0.5241537287811314,
|
|
"learning_rate": 3.447207161483279e-08,
|
|
"loss": 0.2253,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10600066184997559,
|
|
"step": 820,
|
|
"valid_targets_mean": 7051.8,
|
|
"valid_targets_min": 878
|
|
},
|
|
{
|
|
"epoch": 6.936708860759493,
|
|
"grad_norm": 0.42822827158756854,
|
|
"learning_rate": 1.424851339606903e-08,
|
|
"loss": 0.2264,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11232862621545792,
|
|
"step": 825,
|
|
"valid_targets_mean": 9165.9,
|
|
"valid_targets_min": 7143
|
|
},
|
|
{
|
|
"epoch": 6.978902953586498,
|
|
"grad_norm": 0.4390460209806349,
|
|
"learning_rate": 2.8147893898755605e-09,
|
|
"loss": 0.2299,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10721021145582199,
|
|
"step": 830,
|
|
"valid_targets_mean": 9302.9,
|
|
"valid_targets_min": 8063
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.22618243098258972,
|
|
"step": 833,
|
|
"total_flos": 6.212778952973353e+17,
|
|
"train_loss": 0.28838339830790105,
|
|
"train_runtime": 12171.8398,
|
|
"train_samples_per_second": 1.088,
|
|
"train_steps_per_second": 0.068,
|
|
"valid_targets_mean": 9237.2,
|
|
"valid_targets_min": 6486
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 833,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 6.212778952973353e+17,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|