Files
mix329_tillend_bc329/trainer_state.json
ModelHub XC 8349393985 初始化项目,由ModelHub XC社区提供模型
Model: jiayicheng/mix329_tillend_bc329
Source: Original Platform
2026-05-09 00:05:39 +08:00

675 lines
18 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 287,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12195121951219512,
"grad_norm": 11.268886282590001,
"learning_rate": 5.517241379310345e-06,
"loss": 0.7129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08619208633899689,
"step": 5,
"valid_targets_mean": 1809.8,
"valid_targets_min": 413
},
{
"epoch": 0.24390243902439024,
"grad_norm": 6.618387158921114,
"learning_rate": 1.2413793103448277e-05,
"loss": 0.5949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15227359533309937,
"step": 10,
"valid_targets_mean": 1454.2,
"valid_targets_min": 1048
},
{
"epoch": 0.36585365853658536,
"grad_norm": 1.1697598603814097,
"learning_rate": 1.931034482758621e-05,
"loss": 0.3616,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03705377131700516,
"step": 15,
"valid_targets_mean": 1068.8,
"valid_targets_min": 603
},
{
"epoch": 0.4878048780487805,
"grad_norm": 1.0542992580910668,
"learning_rate": 2.620689655172414e-05,
"loss": 0.3145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05863872170448303,
"step": 20,
"valid_targets_mean": 1012.8,
"valid_targets_min": 786
},
{
"epoch": 0.6097560975609756,
"grad_norm": 0.8906250398426856,
"learning_rate": 3.310344827586207e-05,
"loss": 0.3165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1044623851776123,
"step": 25,
"valid_targets_mean": 3050.0,
"valid_targets_min": 1178
},
{
"epoch": 0.7317073170731707,
"grad_norm": 0.7873917512747328,
"learning_rate": 4e-05,
"loss": 0.2592,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07892066985368729,
"step": 30,
"valid_targets_mean": 2248.5,
"valid_targets_min": 1487
},
{
"epoch": 0.8536585365853658,
"grad_norm": 0.7365758820610112,
"learning_rate": 3.99629433475729e-05,
"loss": 0.2519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05776943266391754,
"step": 35,
"valid_targets_mean": 2209.0,
"valid_targets_min": 853
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.9982728088095962,
"learning_rate": 3.985191070984053e-05,
"loss": 0.2633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08143679797649384,
"step": 40,
"valid_targets_mean": 1733.5,
"valid_targets_min": 542
},
{
"epoch": 1.0975609756097562,
"grad_norm": 0.6049830734772758,
"learning_rate": 3.966731353658932e-05,
"loss": 0.2297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05846400931477547,
"step": 45,
"valid_targets_mean": 2682.8,
"valid_targets_min": 1287
},
{
"epoch": 1.2195121951219512,
"grad_norm": 0.6718448910202545,
"learning_rate": 3.940983588314811e-05,
"loss": 0.2282,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05738037824630737,
"step": 50,
"valid_targets_mean": 2266.2,
"valid_targets_min": 917
},
{
"epoch": 1.3414634146341464,
"grad_norm": 0.7687467547629783,
"learning_rate": 3.908043187550802e-05,
"loss": 0.2176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.034274592995643616,
"step": 55,
"valid_targets_mean": 1044.0,
"valid_targets_min": 912
},
{
"epoch": 1.4634146341463414,
"grad_norm": 0.4409997756684539,
"learning_rate": 3.868032217465097e-05,
"loss": 0.2294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06288576126098633,
"step": 60,
"valid_targets_mean": 7464.2,
"valid_targets_min": 4233
},
{
"epoch": 1.5853658536585367,
"grad_norm": 0.6669183491635846,
"learning_rate": 3.821098945318869e-05,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.041596993803977966,
"step": 65,
"valid_targets_mean": 2462.2,
"valid_targets_min": 603
},
{
"epoch": 1.7073170731707317,
"grad_norm": 0.833951005600001,
"learning_rate": 3.767417290107439e-05,
"loss": 0.2229,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0828239768743515,
"step": 70,
"valid_targets_mean": 1676.0,
"valid_targets_min": 960
},
{
"epoch": 1.8292682926829267,
"grad_norm": 0.3827765345675253,
"learning_rate": 3.7071861780746934e-05,
"loss": 0.1918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03231481462717056,
"step": 75,
"valid_targets_mean": 6333.8,
"valid_targets_min": 1529
},
{
"epoch": 1.951219512195122,
"grad_norm": 0.405420209692415,
"learning_rate": 3.640628805559022e-05,
"loss": 0.1834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.021378157660365105,
"step": 80,
"valid_targets_mean": 2585.5,
"valid_targets_min": 894
},
{
"epoch": 2.073170731707317,
"grad_norm": 0.6706457928261834,
"learning_rate": 3.567991811902403e-05,
"loss": 0.1949,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.029131846502423286,
"step": 85,
"valid_targets_mean": 1901.5,
"valid_targets_min": 758
},
{
"epoch": 2.1951219512195124,
"grad_norm": 0.6506505238615833,
"learning_rate": 3.489544365487564e-05,
"loss": 0.1904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06596972048282623,
"step": 90,
"valid_targets_mean": 2361.2,
"valid_targets_min": 421
},
{
"epoch": 2.317073170731707,
"grad_norm": 0.5666611455881608,
"learning_rate": 3.4055771662900637e-05,
"loss": 0.1738,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.032204728573560715,
"step": 95,
"valid_targets_mean": 1985.5,
"valid_targets_min": 813
},
{
"epoch": 2.4390243902439024,
"grad_norm": 0.5528709212555227,
"learning_rate": 3.316401368641496e-05,
"loss": 0.1743,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03457804396748543,
"step": 100,
"valid_targets_mean": 3545.0,
"valid_targets_min": 423
},
{
"epoch": 2.5609756097560976,
"grad_norm": 0.8867699611106878,
"learning_rate": 3.222347428195699e-05,
"loss": 0.182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06031285971403122,
"step": 105,
"valid_targets_mean": 1640.8,
"valid_targets_min": 938
},
{
"epoch": 2.682926829268293,
"grad_norm": 0.5690766882507081,
"learning_rate": 3.1237638773707214e-05,
"loss": 0.1587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04014682024717331,
"step": 110,
"valid_targets_mean": 2029.8,
"valid_targets_min": 960
},
{
"epoch": 2.8048780487804876,
"grad_norm": 0.7008728096682685,
"learning_rate": 3.0210160338043583e-05,
"loss": 0.1708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07026761770248413,
"step": 115,
"valid_targets_mean": 2580.8,
"valid_targets_min": 786
},
{
"epoch": 2.926829268292683,
"grad_norm": 0.7716450116227951,
"learning_rate": 2.9144846466092773e-05,
"loss": 0.1624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06721051037311554,
"step": 120,
"valid_targets_mean": 2570.2,
"valid_targets_min": 1543
},
{
"epoch": 3.048780487804878,
"grad_norm": 1.5503052769053867,
"learning_rate": 2.804564485444265e-05,
"loss": 0.1503,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0261751189827919,
"step": 125,
"valid_targets_mean": 1452.5,
"valid_targets_min": 917
},
{
"epoch": 3.1707317073170733,
"grad_norm": 0.6734417093831508,
"learning_rate": 2.691662877630023e-05,
"loss": 0.1451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.028553606942296028,
"step": 130,
"valid_targets_mean": 2922.2,
"valid_targets_min": 1260
},
{
"epoch": 3.292682926829268,
"grad_norm": 1.456490794322615,
"learning_rate": 2.5761981987304757e-05,
"loss": 0.1455,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03396756574511528,
"step": 135,
"valid_targets_mean": 1340.2,
"valid_targets_min": 796
},
{
"epoch": 3.4146341463414633,
"grad_norm": 0.9411604050978699,
"learning_rate": 2.4585983221929803e-05,
"loss": 0.1411,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.037783801555633545,
"step": 140,
"valid_targets_mean": 2368.0,
"valid_targets_min": 797
},
{
"epoch": 3.5365853658536586,
"grad_norm": 0.9034332983731574,
"learning_rate": 2.3392990337925696e-05,
"loss": 0.1504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04916565865278244,
"step": 145,
"valid_targets_mean": 1793.0,
"valid_targets_min": 648
},
{
"epoch": 3.658536585365854,
"grad_norm": 0.9084489941265945,
"learning_rate": 2.2187424167557496e-05,
"loss": 0.1577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04747513309121132,
"step": 150,
"valid_targets_mean": 2595.5,
"valid_targets_min": 870
},
{
"epoch": 3.7804878048780486,
"grad_norm": 0.8065019341177397,
"learning_rate": 2.0973752135480505e-05,
"loss": 0.1459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.030843544751405716,
"step": 155,
"valid_targets_mean": 1864.2,
"valid_targets_min": 1047
},
{
"epoch": 3.902439024390244,
"grad_norm": 0.5952164537741085,
"learning_rate": 1.9756471703960053e-05,
"loss": 0.1474,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020152652636170387,
"step": 160,
"valid_targets_mean": 1157.0,
"valid_targets_min": 537
},
{
"epoch": 4.024390243902439,
"grad_norm": 0.5913764265658609,
"learning_rate": 1.8540093706781848e-05,
"loss": 0.1105,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.023658908903598785,
"step": 165,
"valid_targets_mean": 1657.0,
"valid_targets_min": 1129
},
{
"epoch": 4.146341463414634,
"grad_norm": 0.8822354625847237,
"learning_rate": 1.7329125633612044e-05,
"loss": 0.1177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03471684455871582,
"step": 170,
"valid_targets_mean": 1434.2,
"valid_targets_min": 1080
},
{
"epoch": 4.2682926829268295,
"grad_norm": 1.1958146779982144,
"learning_rate": 1.6128054926749403e-05,
"loss": 0.1245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03604736179113388,
"step": 175,
"valid_targets_mean": 2452.0,
"valid_targets_min": 883
},
{
"epoch": 4.390243902439025,
"grad_norm": 0.660365938604491,
"learning_rate": 1.4941332352166385e-05,
"loss": 0.1253,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022628050297498703,
"step": 180,
"valid_targets_mean": 2193.5,
"valid_targets_min": 832
},
{
"epoch": 4.512195121951219,
"grad_norm": 0.6469748368984156,
"learning_rate": 1.3773355506460369e-05,
"loss": 0.1212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.04104577377438545,
"step": 185,
"valid_targets_mean": 2404.2,
"valid_targets_min": 491
},
{
"epoch": 4.634146341463414,
"grad_norm": 0.5688289937825557,
"learning_rate": 1.2628452520832766e-05,
"loss": 0.1289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026066523045301437,
"step": 190,
"valid_targets_mean": 1617.0,
"valid_targets_min": 648
},
{
"epoch": 4.7560975609756095,
"grad_norm": 0.8846513600102238,
"learning_rate": 1.1510866022483702e-05,
"loss": 0.112,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03839657083153725,
"step": 195,
"valid_targets_mean": 2619.5,
"valid_targets_min": 832
},
{
"epoch": 4.878048780487805,
"grad_norm": 0.8205515547497473,
"learning_rate": 1.0424737412855825e-05,
"loss": 0.1243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.05243876576423645,
"step": 200,
"valid_targets_mean": 1985.8,
"valid_targets_min": 1428
},
{
"epoch": 5.0,
"grad_norm": 0.8961051415857917,
"learning_rate": 9.374091520986936e-06,
"loss": 0.144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026954276487231255,
"step": 205,
"valid_targets_mean": 2568.2,
"valid_targets_min": 795
},
{
"epoch": 5.121951219512195,
"grad_norm": 0.6162209862161441,
"learning_rate": 8.362821688840947e-06,
"loss": 0.1142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.024338502436876297,
"step": 210,
"valid_targets_mean": 1392.8,
"valid_targets_min": 1031
},
{
"epoch": 5.2439024390243905,
"grad_norm": 0.8925372695642547,
"learning_rate": 7.394675343885827e-06,
"loss": 0.1041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03295106813311577,
"step": 215,
"valid_targets_mean": 3177.8,
"valid_targets_min": 1031
},
{
"epoch": 5.365853658536586,
"grad_norm": 0.6073755624637226,
"learning_rate": 6.473240112381944e-06,
"loss": 0.1122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02173219993710518,
"step": 220,
"valid_targets_mean": 3121.0,
"valid_targets_min": 1748
},
{
"epoch": 5.487804878048781,
"grad_norm": 1.040705900656647,
"learning_rate": 5.601930524840087e-06,
"loss": 0.1248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025791862979531288,
"step": 225,
"valid_targets_mean": 2800.5,
"valid_targets_min": 929
},
{
"epoch": 5.609756097560975,
"grad_norm": 0.8950373265293078,
"learning_rate": 4.7839753629144395e-06,
"loss": 0.1106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.014685861766338348,
"step": 230,
"valid_targets_mean": 945.0,
"valid_targets_min": 421
},
{
"epoch": 5.7317073170731705,
"grad_norm": 0.7649064738807579,
"learning_rate": 4.022405694618659e-06,
"loss": 0.108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.02761884592473507,
"step": 235,
"valid_targets_mean": 1485.0,
"valid_targets_min": 331
},
{
"epoch": 5.853658536585366,
"grad_norm": 0.8719679552921337,
"learning_rate": 3.320043642202444e-06,
"loss": 0.1033,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03598159924149513,
"step": 240,
"valid_targets_mean": 3070.0,
"valid_targets_min": 1260
},
{
"epoch": 5.975609756097561,
"grad_norm": 0.6242438144503197,
"learning_rate": 2.679491924311226e-06,
"loss": 0.1044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.025556661188602448,
"step": 245,
"valid_targets_mean": 2245.0,
"valid_targets_min": 1069
},
{
"epoch": 6.097560975609756,
"grad_norm": 0.8732700455256523,
"learning_rate": 2.103124211182164e-06,
"loss": 0.1087,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03347954526543617,
"step": 250,
"valid_targets_mean": 1517.0,
"valid_targets_min": 606
},
{
"epoch": 6.219512195121951,
"grad_norm": 1.0111215639307505,
"learning_rate": 1.5930763286168138e-06,
"loss": 0.1049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.026082392781972885,
"step": 255,
"valid_targets_mean": 1642.8,
"valid_targets_min": 960
},
{
"epoch": 6.341463414634147,
"grad_norm": 0.7577281317860388,
"learning_rate": 1.1512383433257112e-06,
"loss": 0.0969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.01752658188343048,
"step": 260,
"valid_targets_mean": 2109.5,
"valid_targets_min": 999
},
{
"epoch": 6.463414634146342,
"grad_norm": 0.6730869279262718,
"learning_rate": 7.792475589738679e-07,
"loss": 0.1071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.020278798416256905,
"step": 265,
"valid_targets_mean": 1482.5,
"valid_targets_min": 859
},
{
"epoch": 6.585365853658536,
"grad_norm": 0.7854617549186186,
"learning_rate": 4.784824488814588e-07,
"loss": 0.1053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022827567532658577,
"step": 270,
"valid_targets_mean": 2941.8,
"valid_targets_min": 695
},
{
"epoch": 6.7073170731707314,
"grad_norm": 0.5254164831244615,
"learning_rate": 2.5005754786317173e-07,
"loss": 0.1065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.022403722628951073,
"step": 275,
"valid_targets_mean": 2718.0,
"valid_targets_min": 1325
},
{
"epoch": 6.829268292682927,
"grad_norm": 0.6678257554250038,
"learning_rate": 9.481932213528444e-08,
"loss": 0.0987,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.027537822723388672,
"step": 280,
"valid_targets_mean": 3525.0,
"valid_targets_min": 1519
},
{
"epoch": 6.951219512195122,
"grad_norm": 0.8096613625255807,
"learning_rate": 1.334303259521219e-08,
"loss": 0.1098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.03351534903049469,
"step": 285,
"valid_targets_mean": 1808.5,
"valid_targets_min": 1244
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.016517292708158493,
"step": 287,
"total_flos": 9.154523001624986e+16,
"train_loss": 0.17878121568558522,
"train_runtime": 15474.591,
"train_samples_per_second": 0.297,
"train_steps_per_second": 0.019,
"valid_targets_mean": 880.2,
"valid_targets_min": 562
}
],
"logging_steps": 5,
"max_steps": 287,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.154523001624986e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}