Model: laion/Kimi-2.5-swesmith-r2egym-solved-maxeps-32k__Qwen3-8B Source: Original Platform
1335 lines
37 KiB
JSON
1335 lines
37 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 7.0,
|
|
"eval_steps": 500,
|
|
"global_step": 588,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.05952380952380952,
|
|
"grad_norm": 26.475136303319733,
|
|
"learning_rate": 2.7118644067796613e-06,
|
|
"loss": 0.8757,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.28182709217071533,
|
|
"step": 5,
|
|
"valid_targets_mean": 5259.6,
|
|
"valid_targets_min": 241
|
|
},
|
|
{
|
|
"epoch": 0.11904761904761904,
|
|
"grad_norm": 10.752364491117543,
|
|
"learning_rate": 6.1016949152542385e-06,
|
|
"loss": 0.7723,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.23036649823188782,
|
|
"step": 10,
|
|
"valid_targets_mean": 5471.6,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 0.17857142857142858,
|
|
"grad_norm": 2.0975582533943844,
|
|
"learning_rate": 9.491525423728815e-06,
|
|
"loss": 0.5988,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1777261197566986,
|
|
"step": 15,
|
|
"valid_targets_mean": 5108.2,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 0.23809523809523808,
|
|
"grad_norm": 1.4703223243635486,
|
|
"learning_rate": 1.288135593220339e-05,
|
|
"loss": 0.548,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1800306737422943,
|
|
"step": 20,
|
|
"valid_targets_mean": 5685.6,
|
|
"valid_targets_min": 102
|
|
},
|
|
{
|
|
"epoch": 0.2976190476190476,
|
|
"grad_norm": 0.8534483461368364,
|
|
"learning_rate": 1.6271186440677967e-05,
|
|
"loss": 0.5087,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14695829153060913,
|
|
"step": 25,
|
|
"valid_targets_mean": 4944.7,
|
|
"valid_targets_min": 2418
|
|
},
|
|
{
|
|
"epoch": 0.35714285714285715,
|
|
"grad_norm": 0.582534993586709,
|
|
"learning_rate": 1.9661016949152545e-05,
|
|
"loss": 0.4839,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.16712947189807892,
|
|
"step": 30,
|
|
"valid_targets_mean": 6107.1,
|
|
"valid_targets_min": 2487
|
|
},
|
|
{
|
|
"epoch": 0.4166666666666667,
|
|
"grad_norm": 0.49806474628335173,
|
|
"learning_rate": 2.3050847457627122e-05,
|
|
"loss": 0.4524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.15718674659729004,
|
|
"step": 35,
|
|
"valid_targets_mean": 6053.5,
|
|
"valid_targets_min": 439
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"grad_norm": 0.39977189555606873,
|
|
"learning_rate": 2.6440677966101696e-05,
|
|
"loss": 0.4284,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.13855797052383423,
|
|
"step": 40,
|
|
"valid_targets_mean": 5599.5,
|
|
"valid_targets_min": 2254
|
|
},
|
|
{
|
|
"epoch": 0.5357142857142857,
|
|
"grad_norm": 0.35501868807640347,
|
|
"learning_rate": 2.9830508474576274e-05,
|
|
"loss": 0.4083,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1349203884601593,
|
|
"step": 45,
|
|
"valid_targets_mean": 6183.1,
|
|
"valid_targets_min": 231
|
|
},
|
|
{
|
|
"epoch": 0.5952380952380952,
|
|
"grad_norm": 0.3120105713672939,
|
|
"learning_rate": 3.322033898305085e-05,
|
|
"loss": 0.3911,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.14311061799526215,
|
|
"step": 50,
|
|
"valid_targets_mean": 5855.2,
|
|
"valid_targets_min": 2956
|
|
},
|
|
{
|
|
"epoch": 0.6547619047619048,
|
|
"grad_norm": 0.2727710443190103,
|
|
"learning_rate": 3.6610169491525426e-05,
|
|
"loss": 0.3768,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.12037570774555206,
|
|
"step": 55,
|
|
"valid_targets_mean": 5236.8,
|
|
"valid_targets_min": 354
|
|
},
|
|
{
|
|
"epoch": 0.7142857142857143,
|
|
"grad_norm": 0.2757726260661749,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.3573,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11726392805576324,
|
|
"step": 60,
|
|
"valid_targets_mean": 5693.2,
|
|
"valid_targets_min": 229
|
|
},
|
|
{
|
|
"epoch": 0.7738095238095238,
|
|
"grad_norm": 0.2786546982169116,
|
|
"learning_rate": 3.9991183494162245e-05,
|
|
"loss": 0.3501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11834583431482315,
|
|
"step": 65,
|
|
"valid_targets_mean": 6120.8,
|
|
"valid_targets_min": 2344
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 0.23636721617724882,
|
|
"learning_rate": 3.996474174972647e-05,
|
|
"loss": 0.3417,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11998672783374786,
|
|
"step": 70,
|
|
"valid_targets_mean": 5844.7,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 0.8928571428571429,
|
|
"grad_norm": 0.24309351059305612,
|
|
"learning_rate": 3.9920698079072125e-05,
|
|
"loss": 0.3344,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10884048044681549,
|
|
"step": 75,
|
|
"valid_targets_mean": 5633.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"grad_norm": 0.2472743814168574,
|
|
"learning_rate": 3.9859091313327124e-05,
|
|
"loss": 0.3289,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09308268129825592,
|
|
"step": 80,
|
|
"valid_targets_mean": 5145.1,
|
|
"valid_targets_min": 2288
|
|
},
|
|
{
|
|
"epoch": 1.0119047619047619,
|
|
"grad_norm": 0.2635411800539745,
|
|
"learning_rate": 3.977997576813247e-05,
|
|
"loss": 0.3198,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.11220236867666245,
|
|
"step": 85,
|
|
"valid_targets_mean": 5678.7,
|
|
"valid_targets_min": 2563
|
|
},
|
|
{
|
|
"epoch": 1.0714285714285714,
|
|
"grad_norm": 0.2880444080728388,
|
|
"learning_rate": 3.968342119575477e-05,
|
|
"loss": 0.3123,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10120871663093567,
|
|
"step": 90,
|
|
"valid_targets_mean": 5122.5,
|
|
"valid_targets_min": 2218
|
|
},
|
|
{
|
|
"epoch": 1.130952380952381,
|
|
"grad_norm": 0.2710755850501796,
|
|
"learning_rate": 3.956951272358911e-05,
|
|
"loss": 0.3128,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10858584940433502,
|
|
"step": 95,
|
|
"valid_targets_mean": 5793.9,
|
|
"valid_targets_min": 2072
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 0.2559535005305801,
|
|
"learning_rate": 3.943835077910648e-05,
|
|
"loss": 0.3078,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08397121727466583,
|
|
"step": 100,
|
|
"valid_targets_mean": 4776.5,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 1.25,
|
|
"grad_norm": 0.2362436236599847,
|
|
"learning_rate": 3.9290051001311815e-05,
|
|
"loss": 0.3036,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10915631055831909,
|
|
"step": 105,
|
|
"valid_targets_mean": 6117.2,
|
|
"valid_targets_min": 1862
|
|
},
|
|
{
|
|
"epoch": 1.3095238095238095,
|
|
"grad_norm": 0.2852961689327156,
|
|
"learning_rate": 3.912474413879077e-05,
|
|
"loss": 0.3055,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09754365682601929,
|
|
"step": 110,
|
|
"valid_targets_mean": 5148.4,
|
|
"valid_targets_min": 436
|
|
},
|
|
{
|
|
"epoch": 1.369047619047619,
|
|
"grad_norm": 0.2794753115317547,
|
|
"learning_rate": 3.894257593443519e-05,
|
|
"loss": 0.2956,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10125661641359329,
|
|
"step": 115,
|
|
"valid_targets_mean": 6039.0,
|
|
"valid_targets_min": 494
|
|
},
|
|
{
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.26532864715105864,
|
|
"learning_rate": 3.874370699694878e-05,
|
|
"loss": 0.298,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10564189404249191,
|
|
"step": 120,
|
|
"valid_targets_mean": 6079.8,
|
|
"valid_targets_min": 471
|
|
},
|
|
{
|
|
"epoch": 1.4880952380952381,
|
|
"grad_norm": 0.29837003528349804,
|
|
"learning_rate": 3.8528312659246395e-05,
|
|
"loss": 0.2976,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09849759191274643,
|
|
"step": 125,
|
|
"valid_targets_mean": 5342.6,
|
|
"valid_targets_min": 1368
|
|
},
|
|
{
|
|
"epoch": 1.5476190476190477,
|
|
"grad_norm": 0.23646296987288887,
|
|
"learning_rate": 3.8296582823871596e-05,
|
|
"loss": 0.2975,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09054168313741684,
|
|
"step": 130,
|
|
"valid_targets_mean": 4966.0,
|
|
"valid_targets_min": 220
|
|
},
|
|
{
|
|
"epoch": 1.6071428571428572,
|
|
"grad_norm": 0.2618136463189114,
|
|
"learning_rate": 3.804872179556902e-05,
|
|
"loss": 0.2944,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09321890771389008,
|
|
"step": 135,
|
|
"valid_targets_mean": 4762.0,
|
|
"valid_targets_min": 165
|
|
},
|
|
{
|
|
"epoch": 1.6666666666666665,
|
|
"grad_norm": 0.2708942100931431,
|
|
"learning_rate": 3.778494810115896e-05,
|
|
"loss": 0.2938,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1079484075307846,
|
|
"step": 140,
|
|
"valid_targets_mean": 5604.2,
|
|
"valid_targets_min": 451
|
|
},
|
|
{
|
|
"epoch": 1.7261904761904763,
|
|
"grad_norm": 0.2789099088534343,
|
|
"learning_rate": 3.750549429687309e-05,
|
|
"loss": 0.2933,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08427592366933823,
|
|
"step": 145,
|
|
"valid_targets_mean": 5146.2,
|
|
"valid_targets_min": 427
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 0.2678992582042396,
|
|
"learning_rate": 3.721060676332109e-05,
|
|
"loss": 0.2962,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10124705731868744,
|
|
"step": 150,
|
|
"valid_targets_mean": 5797.0,
|
|
"valid_targets_min": 389
|
|
},
|
|
{
|
|
"epoch": 1.8452380952380953,
|
|
"grad_norm": 0.2684299288852733,
|
|
"learning_rate": 3.6900545488269066e-05,
|
|
"loss": 0.2894,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0845484584569931,
|
|
"step": 155,
|
|
"valid_targets_mean": 5063.4,
|
|
"valid_targets_min": 445
|
|
},
|
|
{
|
|
"epoch": 1.9047619047619047,
|
|
"grad_norm": 0.2833413534409504,
|
|
"learning_rate": 3.657558383742117e-05,
|
|
"loss": 0.2919,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09527384489774704,
|
|
"step": 160,
|
|
"valid_targets_mean": 5275.2,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 1.9642857142857144,
|
|
"grad_norm": 0.28986568017083997,
|
|
"learning_rate": 3.6236008313406594e-05,
|
|
"loss": 0.2871,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10269998013973236,
|
|
"step": 165,
|
|
"valid_targets_mean": 5881.0,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 2.0238095238095237,
|
|
"grad_norm": 0.27082985663787124,
|
|
"learning_rate": 3.58821183031843e-05,
|
|
"loss": 0.2813,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09580333530902863,
|
|
"step": 170,
|
|
"valid_targets_mean": 5845.4,
|
|
"valid_targets_min": 1920
|
|
},
|
|
{
|
|
"epoch": 2.0833333333333335,
|
|
"grad_norm": 0.27912438131905437,
|
|
"learning_rate": 3.55142258140884e-05,
|
|
"loss": 0.2816,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0991968959569931,
|
|
"step": 175,
|
|
"valid_targets_mean": 6037.7,
|
|
"valid_targets_min": 2878
|
|
},
|
|
{
|
|
"epoch": 2.142857142857143,
|
|
"grad_norm": 0.2495296583091803,
|
|
"learning_rate": 3.513265519874668e-05,
|
|
"loss": 0.2779,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0991060882806778,
|
|
"step": 180,
|
|
"valid_targets_mean": 5959.1,
|
|
"valid_targets_min": 470
|
|
},
|
|
{
|
|
"epoch": 2.2023809523809526,
|
|
"grad_norm": 0.24797503017191885,
|
|
"learning_rate": 3.473774286911489e-05,
|
|
"loss": 0.2784,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10953380167484283,
|
|
"step": 185,
|
|
"valid_targets_mean": 6153.2,
|
|
"valid_targets_min": 3118
|
|
},
|
|
{
|
|
"epoch": 2.261904761904762,
|
|
"grad_norm": 0.2635797062915291,
|
|
"learning_rate": 3.432983699987901e-05,
|
|
"loss": 0.2785,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07885843515396118,
|
|
"step": 190,
|
|
"valid_targets_mean": 4745.1,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 2.3214285714285716,
|
|
"grad_norm": 0.27743273802507495,
|
|
"learning_rate": 3.390929722148677e-05,
|
|
"loss": 0.2786,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08541278541088104,
|
|
"step": 195,
|
|
"valid_targets_mean": 5393.5,
|
|
"valid_targets_min": 2005
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 0.27104625586587555,
|
|
"learning_rate": 3.3476494303079285e-05,
|
|
"loss": 0.2778,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08726505935192108,
|
|
"step": 200,
|
|
"valid_targets_mean": 5373.0,
|
|
"valid_targets_min": 257
|
|
},
|
|
{
|
|
"epoch": 2.4404761904761907,
|
|
"grad_norm": 0.25680629085820356,
|
|
"learning_rate": 3.303180982560224e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08836235105991364,
|
|
"step": 205,
|
|
"valid_targets_mean": 5117.2,
|
|
"valid_targets_min": 361
|
|
},
|
|
{
|
|
"epoch": 2.5,
|
|
"grad_norm": 0.2722466936160436,
|
|
"learning_rate": 3.2575635845384787e-05,
|
|
"loss": 0.2746,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09811330586671829,
|
|
"step": 210,
|
|
"valid_targets_mean": 5944.3,
|
|
"valid_targets_min": 2473
|
|
},
|
|
{
|
|
"epoch": 2.5595238095238093,
|
|
"grad_norm": 0.2751403433864084,
|
|
"learning_rate": 3.21083745484829e-05,
|
|
"loss": 0.2707,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07897627353668213,
|
|
"step": 215,
|
|
"valid_targets_mean": 4240.3,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 2.619047619047619,
|
|
"grad_norm": 0.2862317925607407,
|
|
"learning_rate": 3.1630437896091756e-05,
|
|
"loss": 0.2792,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08746589720249176,
|
|
"step": 220,
|
|
"valid_targets_mean": 5690.9,
|
|
"valid_targets_min": 440
|
|
},
|
|
{
|
|
"epoch": 2.678571428571429,
|
|
"grad_norm": 0.2813165779096547,
|
|
"learning_rate": 3.114224726133996e-05,
|
|
"loss": 0.273,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0828898623585701,
|
|
"step": 225,
|
|
"valid_targets_mean": 4970.9,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 2.738095238095238,
|
|
"grad_norm": 0.27056484197253816,
|
|
"learning_rate": 3.0644233057785615e-05,
|
|
"loss": 0.2706,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08945001661777496,
|
|
"step": 230,
|
|
"valid_targets_mean": 5406.1,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 2.7976190476190474,
|
|
"grad_norm": 0.2685779777737954,
|
|
"learning_rate": 3.0136834359942032e-05,
|
|
"loss": 0.2684,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08828261494636536,
|
|
"step": 235,
|
|
"valid_targets_mean": 5503.7,
|
|
"valid_targets_min": 368
|
|
},
|
|
{
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 0.2732406218897489,
|
|
"learning_rate": 2.9620498516167356e-05,
|
|
"loss": 0.2717,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07858332991600037,
|
|
"step": 240,
|
|
"valid_targets_mean": 5290.2,
|
|
"valid_targets_min": 2127
|
|
},
|
|
{
|
|
"epoch": 2.9166666666666665,
|
|
"grad_norm": 0.2399472243045982,
|
|
"learning_rate": 2.9095680754259687e-05,
|
|
"loss": 0.2677,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08259420096874237,
|
|
"step": 245,
|
|
"valid_targets_mean": 4813.6,
|
|
"valid_targets_min": 409
|
|
},
|
|
{
|
|
"epoch": 2.9761904761904763,
|
|
"grad_norm": 0.24882382594549618,
|
|
"learning_rate": 2.8562843780105182e-05,
|
|
"loss": 0.2702,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10204588621854782,
|
|
"step": 250,
|
|
"valid_targets_mean": 6258.4,
|
|
"valid_targets_min": 1978
|
|
},
|
|
{
|
|
"epoch": 3.0357142857142856,
|
|
"grad_norm": 0.24889920345299785,
|
|
"learning_rate": 2.8022457369733165e-05,
|
|
"loss": 0.2662,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08854080736637115,
|
|
"step": 255,
|
|
"valid_targets_mean": 6058.7,
|
|
"valid_targets_min": 437
|
|
},
|
|
{
|
|
"epoch": 3.0952380952380953,
|
|
"grad_norm": 0.26589186213113003,
|
|
"learning_rate": 2.7474997955137803e-05,
|
|
"loss": 0.2625,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07796312123537064,
|
|
"step": 260,
|
|
"valid_targets_mean": 5013.2,
|
|
"valid_targets_min": 416
|
|
},
|
|
{
|
|
"epoch": 3.1547619047619047,
|
|
"grad_norm": 0.2892003595844532,
|
|
"learning_rate": 2.6920948204231573e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0770723819732666,
|
|
"step": 265,
|
|
"valid_targets_mean": 4645.8,
|
|
"valid_targets_min": 424
|
|
},
|
|
{
|
|
"epoch": 3.2142857142857144,
|
|
"grad_norm": 0.29019028159438703,
|
|
"learning_rate": 2.636079659530079e-05,
|
|
"loss": 0.265,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08780994266271591,
|
|
"step": 270,
|
|
"valid_targets_mean": 5075.2,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 3.2738095238095237,
|
|
"grad_norm": 0.25707211946258385,
|
|
"learning_rate": 2.5795036986338477e-05,
|
|
"loss": 0.2656,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07921002805233002,
|
|
"step": 275,
|
|
"valid_targets_mean": 4983.1,
|
|
"valid_targets_min": 259
|
|
},
|
|
{
|
|
"epoch": 3.3333333333333335,
|
|
"grad_norm": 0.2393833152778856,
|
|
"learning_rate": 2.522416817963416e-05,
|
|
"loss": 0.2615,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08294697105884552,
|
|
"step": 280,
|
|
"valid_targets_mean": 5771.5,
|
|
"valid_targets_min": 3118
|
|
},
|
|
{
|
|
"epoch": 3.392857142857143,
|
|
"grad_norm": 0.25514689276071223,
|
|
"learning_rate": 2.464869348200452e-05,
|
|
"loss": 0.2592,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09682682156562805,
|
|
"step": 285,
|
|
"valid_targets_mean": 5856.5,
|
|
"valid_targets_min": 487
|
|
},
|
|
{
|
|
"epoch": 3.4523809523809526,
|
|
"grad_norm": 0.25666137663935856,
|
|
"learning_rate": 2.4069120261052682e-05,
|
|
"loss": 0.2658,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.10615084320306778,
|
|
"step": 290,
|
|
"valid_targets_mean": 6388.9,
|
|
"valid_targets_min": 2484
|
|
},
|
|
{
|
|
"epoch": 3.511904761904762,
|
|
"grad_norm": 0.24250690776748496,
|
|
"learning_rate": 2.3485959497847223e-05,
|
|
"loss": 0.2583,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07441714406013489,
|
|
"step": 295,
|
|
"valid_targets_mean": 5149.7,
|
|
"valid_targets_min": 396
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 0.2605704598646807,
|
|
"learning_rate": 2.2899725336415468e-05,
|
|
"loss": 0.2619,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08115138858556747,
|
|
"step": 300,
|
|
"valid_targets_mean": 5677.9,
|
|
"valid_targets_min": 2348
|
|
},
|
|
{
|
|
"epoch": 3.630952380952381,
|
|
"grad_norm": 0.24661982746609362,
|
|
"learning_rate": 2.2310934630448076e-05,
|
|
"loss": 0.2598,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08594545722007751,
|
|
"step": 305,
|
|
"valid_targets_mean": 5679.9,
|
|
"valid_targets_min": 1757
|
|
},
|
|
{
|
|
"epoch": 3.6904761904761907,
|
|
"grad_norm": 0.25915778890419666,
|
|
"learning_rate": 2.1720106487614678e-05,
|
|
"loss": 0.2564,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09154875576496124,
|
|
"step": 310,
|
|
"valid_targets_mean": 5870.5,
|
|
"valid_targets_min": 2366
|
|
},
|
|
{
|
|
"epoch": 3.75,
|
|
"grad_norm": 0.2517589506891814,
|
|
"learning_rate": 2.112776181189232e-05,
|
|
"loss": 0.2622,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08415160328149796,
|
|
"step": 315,
|
|
"valid_targets_mean": 5251.9,
|
|
"valid_targets_min": 401
|
|
},
|
|
{
|
|
"epoch": 3.8095238095238093,
|
|
"grad_norm": 0.2700239209259606,
|
|
"learning_rate": 2.0534422844310144e-05,
|
|
"loss": 0.2644,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07980450242757797,
|
|
"step": 320,
|
|
"valid_targets_mean": 4789.0,
|
|
"valid_targets_min": 321
|
|
},
|
|
{
|
|
"epoch": 3.869047619047619,
|
|
"grad_norm": 0.2549269120465629,
|
|
"learning_rate": 1.9940612702515292e-05,
|
|
"loss": 0.2574,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.1039179265499115,
|
|
"step": 325,
|
|
"valid_targets_mean": 6290.8,
|
|
"valid_targets_min": 515
|
|
},
|
|
{
|
|
"epoch": 3.928571428571429,
|
|
"grad_norm": 0.28891001950049783,
|
|
"learning_rate": 1.934685491956595e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08675411343574524,
|
|
"step": 330,
|
|
"valid_targets_mean": 5501.4,
|
|
"valid_targets_min": 414
|
|
},
|
|
{
|
|
"epoch": 3.988095238095238,
|
|
"grad_norm": 0.25893042450210946,
|
|
"learning_rate": 1.8753672982358055e-05,
|
|
"loss": 0.2617,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08645622432231903,
|
|
"step": 335,
|
|
"valid_targets_mean": 5373.1,
|
|
"valid_targets_min": 2838
|
|
},
|
|
{
|
|
"epoch": 4.0476190476190474,
|
|
"grad_norm": 0.25042985945546514,
|
|
"learning_rate": 1.8161589870092842e-05,
|
|
"loss": 0.2585,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08585283160209656,
|
|
"step": 340,
|
|
"valid_targets_mean": 5608.5,
|
|
"valid_targets_min": 421
|
|
},
|
|
{
|
|
"epoch": 4.107142857142857,
|
|
"grad_norm": 0.26603784455504254,
|
|
"learning_rate": 1.7571127593191877e-05,
|
|
"loss": 0.252,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08927032351493835,
|
|
"step": 345,
|
|
"valid_targets_mean": 5348.3,
|
|
"valid_targets_min": 435
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 0.2808689664261119,
|
|
"learning_rate": 1.6982806733066303e-05,
|
|
"loss": 0.2544,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08572648465633392,
|
|
"step": 350,
|
|
"valid_targets_mean": 5795.5,
|
|
"valid_targets_min": 1835
|
|
},
|
|
{
|
|
"epoch": 4.226190476190476,
|
|
"grad_norm": 0.2584899797868476,
|
|
"learning_rate": 1.639714598314588e-05,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07797005027532578,
|
|
"step": 355,
|
|
"valid_targets_mean": 4981.5,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 4.285714285714286,
|
|
"grad_norm": 0.2485126801540494,
|
|
"learning_rate": 1.5814661691572673e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08520189672708511,
|
|
"step": 360,
|
|
"valid_targets_mean": 6290.2,
|
|
"valid_targets_min": 1974
|
|
},
|
|
{
|
|
"epoch": 4.345238095238095,
|
|
"grad_norm": 0.2513271250286924,
|
|
"learning_rate": 1.5235867405962397e-05,
|
|
"loss": 0.2562,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09782940149307251,
|
|
"step": 365,
|
|
"valid_targets_mean": 6345.8,
|
|
"valid_targets_min": 335
|
|
},
|
|
{
|
|
"epoch": 4.404761904761905,
|
|
"grad_norm": 0.250961969245666,
|
|
"learning_rate": 1.4661273420634836e-05,
|
|
"loss": 0.2557,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09486376494169235,
|
|
"step": 370,
|
|
"valid_targets_mean": 5938.4,
|
|
"valid_targets_min": 2327
|
|
},
|
|
{
|
|
"epoch": 4.464285714285714,
|
|
"grad_norm": 0.24731218596658003,
|
|
"learning_rate": 1.4091386326712599e-05,
|
|
"loss": 0.2534,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0861523300409317,
|
|
"step": 375,
|
|
"valid_targets_mean": 5634.8,
|
|
"valid_targets_min": 349
|
|
},
|
|
{
|
|
"epoch": 4.523809523809524,
|
|
"grad_norm": 0.24844990717404133,
|
|
"learning_rate": 1.3526708565484726e-05,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08229143917560577,
|
|
"step": 380,
|
|
"valid_targets_mean": 5422.2,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 4.583333333333333,
|
|
"grad_norm": 0.2333334277350641,
|
|
"learning_rate": 1.2967737985429041e-05,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08357102423906326,
|
|
"step": 385,
|
|
"valid_targets_mean": 5235.4,
|
|
"valid_targets_min": 1760
|
|
},
|
|
{
|
|
"epoch": 4.642857142857143,
|
|
"grad_norm": 0.25215688549131215,
|
|
"learning_rate": 1.2414967403283776e-05,
|
|
"loss": 0.2491,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08776518702507019,
|
|
"step": 390,
|
|
"valid_targets_mean": 5544.2,
|
|
"valid_targets_min": 464
|
|
},
|
|
{
|
|
"epoch": 4.7023809523809526,
|
|
"grad_norm": 0.2293231506390837,
|
|
"learning_rate": 1.1868884169555353e-05,
|
|
"loss": 0.2525,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07636909186840057,
|
|
"step": 395,
|
|
"valid_targets_mean": 5233.3,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 0.2664616026061766,
|
|
"learning_rate": 1.1329969738845605e-05,
|
|
"loss": 0.258,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08857310563325882,
|
|
"step": 400,
|
|
"valid_targets_mean": 5488.7,
|
|
"valid_targets_min": 2368
|
|
},
|
|
{
|
|
"epoch": 4.821428571428571,
|
|
"grad_norm": 0.22965867799638506,
|
|
"learning_rate": 1.0798699245376959e-05,
|
|
"loss": 0.2578,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09623508155345917,
|
|
"step": 405,
|
|
"valid_targets_mean": 6153.3,
|
|
"valid_targets_min": 2642
|
|
},
|
|
{
|
|
"epoch": 4.880952380952381,
|
|
"grad_norm": 0.23714434789026348,
|
|
"learning_rate": 1.0275541084090127e-05,
|
|
"loss": 0.2492,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09711508452892303,
|
|
"step": 410,
|
|
"valid_targets_mean": 6321.8,
|
|
"valid_targets_min": 2856
|
|
},
|
|
{
|
|
"epoch": 4.940476190476191,
|
|
"grad_norm": 0.2335193268265453,
|
|
"learning_rate": 9.760956497683412e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08174271881580353,
|
|
"step": 415,
|
|
"valid_targets_mean": 5300.7,
|
|
"valid_targets_min": 222
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 0.24720488171742358,
|
|
"learning_rate": 9.255399169957823e-06,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09777113795280457,
|
|
"step": 420,
|
|
"valid_targets_mean": 5800.2,
|
|
"valid_targets_min": 2100
|
|
},
|
|
{
|
|
"epoch": 5.059523809523809,
|
|
"grad_norm": 0.24642098972509158,
|
|
"learning_rate": 8.759314825826486e-06,
|
|
"loss": 0.2524,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08720912039279938,
|
|
"step": 425,
|
|
"valid_targets_mean": 5300.3,
|
|
"valid_targets_min": 1875
|
|
},
|
|
{
|
|
"epoch": 5.119047619047619,
|
|
"grad_norm": 0.2571302114424818,
|
|
"learning_rate": 8.273140838341003e-06,
|
|
"loss": 0.2535,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07905574142932892,
|
|
"step": 430,
|
|
"valid_targets_mean": 5588.5,
|
|
"valid_targets_min": 355
|
|
},
|
|
{
|
|
"epoch": 5.178571428571429,
|
|
"grad_norm": 0.242878252474619,
|
|
"learning_rate": 7.797305843081255e-06,
|
|
"loss": 0.2555,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07647094130516052,
|
|
"step": 435,
|
|
"valid_targets_mean": 5138.4,
|
|
"valid_targets_min": 449
|
|
},
|
|
{
|
|
"epoch": 5.238095238095238,
|
|
"grad_norm": 0.23526010582884302,
|
|
"learning_rate": 7.332229360248597e-06,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07705177366733551,
|
|
"step": 440,
|
|
"valid_targets_mean": 5300.1,
|
|
"valid_targets_min": 316
|
|
},
|
|
{
|
|
"epoch": 5.2976190476190474,
|
|
"grad_norm": 0.25015849653867017,
|
|
"learning_rate": 6.87832142479562e-06,
|
|
"loss": 0.2473,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07398971170186996,
|
|
"step": 445,
|
|
"valid_targets_mean": 4832.6,
|
|
"valid_targets_min": 386
|
|
},
|
|
{
|
|
"epoch": 5.357142857142857,
|
|
"grad_norm": 0.2378763042427905,
|
|
"learning_rate": 6.4359822249185934e-06,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08934387564659119,
|
|
"step": 450,
|
|
"valid_targets_mean": 5822.2,
|
|
"valid_targets_min": 1976
|
|
},
|
|
{
|
|
"epoch": 5.416666666666667,
|
|
"grad_norm": 0.2205842931707241,
|
|
"learning_rate": 6.005601749231318e-06,
|
|
"loss": 0.2495,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08240607380867004,
|
|
"step": 455,
|
|
"valid_targets_mean": 5815.5,
|
|
"valid_targets_min": 2105
|
|
},
|
|
{
|
|
"epoch": 5.476190476190476,
|
|
"grad_norm": 0.23001451637384798,
|
|
"learning_rate": 5.587559442931429e-06,
|
|
"loss": 0.2478,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09403956681489944,
|
|
"step": 460,
|
|
"valid_targets_mean": 5927.9,
|
|
"valid_targets_min": 1898
|
|
},
|
|
{
|
|
"epoch": 5.535714285714286,
|
|
"grad_norm": 0.2457826876030719,
|
|
"learning_rate": 5.18222387326232e-06,
|
|
"loss": 0.2451,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07478703558444977,
|
|
"step": 465,
|
|
"valid_targets_mean": 5302.4,
|
|
"valid_targets_min": 2446
|
|
},
|
|
{
|
|
"epoch": 5.595238095238095,
|
|
"grad_norm": 0.23812933696457267,
|
|
"learning_rate": 4.7899524045656186e-06,
|
|
"loss": 0.2456,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07499450445175171,
|
|
"step": 470,
|
|
"valid_targets_mean": 5108.1,
|
|
"valid_targets_min": 429
|
|
},
|
|
{
|
|
"epoch": 5.654761904761905,
|
|
"grad_norm": 0.2369372815448469,
|
|
"learning_rate": 4.411090883210684e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07033229619264603,
|
|
"step": 475,
|
|
"valid_targets_mean": 5013.0,
|
|
"valid_targets_min": 576
|
|
},
|
|
{
|
|
"epoch": 5.714285714285714,
|
|
"grad_norm": 0.23807774857438638,
|
|
"learning_rate": 4.0459733326790055e-06,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08270242065191269,
|
|
"step": 480,
|
|
"valid_targets_mean": 5699.3,
|
|
"valid_targets_min": 457
|
|
},
|
|
{
|
|
"epoch": 5.773809523809524,
|
|
"grad_norm": 0.24524035384617784,
|
|
"learning_rate": 3.6949216590721506e-06,
|
|
"loss": 0.2523,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08760789036750793,
|
|
"step": 485,
|
|
"valid_targets_mean": 5810.1,
|
|
"valid_targets_min": 2306
|
|
},
|
|
{
|
|
"epoch": 5.833333333333333,
|
|
"grad_norm": 0.251686432685376,
|
|
"learning_rate": 3.3582453673030923e-06,
|
|
"loss": 0.2447,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08097212016582489,
|
|
"step": 490,
|
|
"valid_targets_mean": 4964.3,
|
|
"valid_targets_min": 319
|
|
},
|
|
{
|
|
"epoch": 5.892857142857143,
|
|
"grad_norm": 0.2235799505428903,
|
|
"learning_rate": 3.0362412882210156e-06,
|
|
"loss": 0.2522,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08248492330312729,
|
|
"step": 495,
|
|
"valid_targets_mean": 5602.2,
|
|
"valid_targets_min": 2422
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 0.23565923364897154,
|
|
"learning_rate": 2.72919331691021e-06,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07904832065105438,
|
|
"step": 500,
|
|
"valid_targets_mean": 5078.7,
|
|
"valid_targets_min": 426
|
|
},
|
|
{
|
|
"epoch": 6.011904761904762,
|
|
"grad_norm": 0.23856362236717238,
|
|
"learning_rate": 2.437372162393834e-06,
|
|
"loss": 0.2533,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09080533683300018,
|
|
"step": 505,
|
|
"valid_targets_mean": 5782.2,
|
|
"valid_targets_min": 376
|
|
},
|
|
{
|
|
"epoch": 6.071428571428571,
|
|
"grad_norm": 0.23251648798736518,
|
|
"learning_rate": 2.16103510896313e-06,
|
|
"loss": 0.2501,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08888229727745056,
|
|
"step": 510,
|
|
"valid_targets_mean": 6173.2,
|
|
"valid_targets_min": 1828
|
|
},
|
|
{
|
|
"epoch": 6.130952380952381,
|
|
"grad_norm": 0.2277877587137689,
|
|
"learning_rate": 1.9004257893425726e-06,
|
|
"loss": 0.2506,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08594680577516556,
|
|
"step": 515,
|
|
"valid_targets_mean": 5924.8,
|
|
"valid_targets_min": 311
|
|
},
|
|
{
|
|
"epoch": 6.190476190476191,
|
|
"grad_norm": 0.234192139822292,
|
|
"learning_rate": 1.6557739698909436e-06,
|
|
"loss": 0.2413,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0754304900765419,
|
|
"step": 520,
|
|
"valid_targets_mean": 4934.6,
|
|
"valid_targets_min": 290
|
|
},
|
|
{
|
|
"epoch": 6.25,
|
|
"grad_norm": 0.22373772258265975,
|
|
"learning_rate": 1.4272953480276774e-06,
|
|
"loss": 0.2479,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08419759571552277,
|
|
"step": 525,
|
|
"valid_targets_mean": 6025.2,
|
|
"valid_targets_min": 1811
|
|
},
|
|
{
|
|
"epoch": 6.309523809523809,
|
|
"grad_norm": 0.23039419023997973,
|
|
"learning_rate": 1.215191362063124e-06,
|
|
"loss": 0.2469,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07240813970565796,
|
|
"step": 530,
|
|
"valid_targets_mean": 4975.7,
|
|
"valid_targets_min": 269
|
|
},
|
|
{
|
|
"epoch": 6.369047619047619,
|
|
"grad_norm": 0.22205982567688556,
|
|
"learning_rate": 1.0196490136003322e-06,
|
|
"loss": 0.2454,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07454642653465271,
|
|
"step": 535,
|
|
"valid_targets_mean": 5364.7,
|
|
"valid_targets_min": 367
|
|
},
|
|
{
|
|
"epoch": 6.428571428571429,
|
|
"grad_norm": 0.25426169519831565,
|
|
"learning_rate": 8.408407026649778e-07,
|
|
"loss": 0.2505,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0795418918132782,
|
|
"step": 540,
|
|
"valid_targets_mean": 5291.6,
|
|
"valid_targets_min": 264
|
|
},
|
|
{
|
|
"epoch": 6.488095238095238,
|
|
"grad_norm": 0.22890244722184025,
|
|
"learning_rate": 6.789240757087823e-07,
|
|
"loss": 0.2462,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08053529262542725,
|
|
"step": 545,
|
|
"valid_targets_mean": 5164.7,
|
|
"valid_targets_min": 434
|
|
},
|
|
{
|
|
"epoch": 6.5476190476190474,
|
|
"grad_norm": 0.2258429317576449,
|
|
"learning_rate": 5.340418866204177e-07,
|
|
"loss": 0.2434,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08423105627298355,
|
|
"step": 550,
|
|
"valid_targets_mean": 5882.1,
|
|
"valid_targets_min": 1985
|
|
},
|
|
{
|
|
"epoch": 6.607142857142857,
|
|
"grad_norm": 0.2308750096320961,
|
|
"learning_rate": 4.063218708664751e-07,
|
|
"loss": 0.2513,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09404976665973663,
|
|
"step": 555,
|
|
"valid_targets_mean": 6039.2,
|
|
"valid_targets_min": 2171
|
|
},
|
|
{
|
|
"epoch": 6.666666666666667,
|
|
"grad_norm": 0.30684656384369974,
|
|
"learning_rate": 2.9587663287340864e-07,
|
|
"loss": 0.2463,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07616351544857025,
|
|
"step": 560,
|
|
"valid_targets_mean": 5435.8,
|
|
"valid_targets_min": 509
|
|
},
|
|
{
|
|
"epoch": 6.726190476190476,
|
|
"grad_norm": 0.24369553807123656,
|
|
"learning_rate": 2.0280354674976576e-07,
|
|
"loss": 0.2457,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08108322322368622,
|
|
"step": 565,
|
|
"valid_targets_mean": 5453.7,
|
|
"valid_targets_min": 308
|
|
},
|
|
{
|
|
"epoch": 6.785714285714286,
|
|
"grad_norm": 0.22143496142468144,
|
|
"learning_rate": 1.2718467043626448e-07,
|
|
"loss": 0.2486,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08642816543579102,
|
|
"step": 570,
|
|
"valid_targets_mean": 5529.3,
|
|
"valid_targets_min": 168
|
|
},
|
|
{
|
|
"epoch": 6.845238095238095,
|
|
"grad_norm": 0.22289015044632118,
|
|
"learning_rate": 6.90866733593465e-08,
|
|
"loss": 0.2464,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.07365033030509949,
|
|
"step": 575,
|
|
"valid_targets_mean": 5419.3,
|
|
"valid_targets_min": 327
|
|
},
|
|
{
|
|
"epoch": 6.904761904761905,
|
|
"grad_norm": 0.24055593132796596,
|
|
"learning_rate": 2.856077765205356e-08,
|
|
"loss": 0.2489,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.09108748286962509,
|
|
"step": 580,
|
|
"valid_targets_mean": 5626.8,
|
|
"valid_targets_min": 1730
|
|
},
|
|
{
|
|
"epoch": 6.964285714285714,
|
|
"grad_norm": 0.23554247965669858,
|
|
"learning_rate": 5.642712993993993e-09,
|
|
"loss": 0.2481,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.0795075073838234,
|
|
"step": 585,
|
|
"valid_targets_mean": 5410.0,
|
|
"valid_targets_min": 423
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"loss_nan_ranks": 0,
|
|
"loss_rank_avg": 0.08444557338953018,
|
|
"step": 588,
|
|
"total_flos": 2.4214382862254735e+18,
|
|
"train_loss": 0.2928499853124424,
|
|
"train_runtime": 13980.8507,
|
|
"train_samples_per_second": 4.035,
|
|
"train_steps_per_second": 0.042,
|
|
"valid_targets_mean": 5498.0,
|
|
"valid_targets_min": 368
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 588,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 7,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4214382862254735e+18,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|