Files
ModelHub XC 6b43b29ab1 初始化项目,由ModelHub XC社区提供模型
Model: anjajar/adult_goldfish_rus_large
Source: Original Platform
2026-06-05 14:55:20 +08:00

714 lines
17 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 98.9795918367347,
"eval_steps": 500,
"global_step": 29100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0204081632653061,
"grad_norm": 1.544520378112793,
"learning_rate": 1.0170068027210885e-05,
"loss": 9.6915,
"step": 300
},
{
"epoch": 2.0408163265306123,
"grad_norm": 1.6265058517456055,
"learning_rate": 2.0374149659863947e-05,
"loss": 8.3327,
"step": 600
},
{
"epoch": 3.061224489795918,
"grad_norm": 2.0470693111419678,
"learning_rate": 3.0578231292517004e-05,
"loss": 7.7972,
"step": 900
},
{
"epoch": 4.081632653061225,
"grad_norm": 1.7373660802841187,
"learning_rate": 4.078231292517007e-05,
"loss": 7.508,
"step": 1200
},
{
"epoch": 5.1020408163265305,
"grad_norm": 2.189188241958618,
"learning_rate": 5.0986394557823136e-05,
"loss": 7.2126,
"step": 1500
},
{
"epoch": 6.122448979591836,
"grad_norm": 2.1524977684020996,
"learning_rate": 6.11904761904762e-05,
"loss": 6.9194,
"step": 1800
},
{
"epoch": 7.142857142857143,
"grad_norm": 2.03401517868042,
"learning_rate": 7.139455782312926e-05,
"loss": 6.6392,
"step": 2100
},
{
"epoch": 8.16326530612245,
"grad_norm": 2.0746707916259766,
"learning_rate": 8.159863945578233e-05,
"loss": 6.3837,
"step": 2400
},
{
"epoch": 9.183673469387756,
"grad_norm": 2.1161186695098877,
"learning_rate": 9.180272108843538e-05,
"loss": 6.1311,
"step": 2700
},
{
"epoch": 10.204081632653061,
"grad_norm": 2.1764469146728516,
"learning_rate": 9.977702191987907e-05,
"loss": 5.8614,
"step": 3000
},
{
"epoch": 11.224489795918368,
"grad_norm": 2.414827823638916,
"learning_rate": 9.86432350718065e-05,
"loss": 5.5985,
"step": 3300
},
{
"epoch": 12.244897959183673,
"grad_norm": 2.5662031173706055,
"learning_rate": 9.750944822373394e-05,
"loss": 5.3029,
"step": 3600
},
{
"epoch": 13.26530612244898,
"grad_norm": 2.821119785308838,
"learning_rate": 9.637566137566139e-05,
"loss": 5.0116,
"step": 3900
},
{
"epoch": 14.285714285714286,
"grad_norm": 3.0048000812530518,
"learning_rate": 9.524187452758882e-05,
"loss": 4.7344,
"step": 4200
},
{
"epoch": 15.306122448979592,
"grad_norm": 3.242598295211792,
"learning_rate": 9.410808767951625e-05,
"loss": 4.4621,
"step": 4500
},
{
"epoch": 16.3265306122449,
"grad_norm": 3.316965103149414,
"learning_rate": 9.29743008314437e-05,
"loss": 4.2054,
"step": 4800
},
{
"epoch": 17.346938775510203,
"grad_norm": 3.7419989109039307,
"learning_rate": 9.184051398337114e-05,
"loss": 3.9517,
"step": 5100
},
{
"epoch": 18.367346938775512,
"grad_norm": 3.7479987144470215,
"learning_rate": 9.070672713529857e-05,
"loss": 3.7011,
"step": 5400
},
{
"epoch": 19.387755102040817,
"grad_norm": 3.7604148387908936,
"learning_rate": 8.9572940287226e-05,
"loss": 3.4686,
"step": 5700
},
{
"epoch": 20.408163265306122,
"grad_norm": 4.052367210388184,
"learning_rate": 8.843915343915344e-05,
"loss": 3.2285,
"step": 6000
},
{
"epoch": 21.428571428571427,
"grad_norm": 4.340782642364502,
"learning_rate": 8.730536659108089e-05,
"loss": 3.0094,
"step": 6300
},
{
"epoch": 22.448979591836736,
"grad_norm": 4.590024948120117,
"learning_rate": 8.617157974300832e-05,
"loss": 2.7901,
"step": 6600
},
{
"epoch": 23.46938775510204,
"grad_norm": 4.343008995056152,
"learning_rate": 8.503779289493575e-05,
"loss": 2.5887,
"step": 6900
},
{
"epoch": 24.489795918367346,
"grad_norm": 4.200931549072266,
"learning_rate": 8.39040060468632e-05,
"loss": 2.3804,
"step": 7200
},
{
"epoch": 25.510204081632654,
"grad_norm": 4.765750408172607,
"learning_rate": 8.277021919879064e-05,
"loss": 2.1853,
"step": 7500
},
{
"epoch": 26.53061224489796,
"grad_norm": 4.196296215057373,
"learning_rate": 8.163643235071807e-05,
"loss": 2.0026,
"step": 7800
},
{
"epoch": 27.551020408163264,
"grad_norm": 4.485163688659668,
"learning_rate": 8.05026455026455e-05,
"loss": 1.8218,
"step": 8100
},
{
"epoch": 28.571428571428573,
"grad_norm": 4.515989780426025,
"learning_rate": 7.936885865457294e-05,
"loss": 1.6588,
"step": 8400
},
{
"epoch": 29.591836734693878,
"grad_norm": 4.492111682891846,
"learning_rate": 7.823507180650039e-05,
"loss": 1.4945,
"step": 8700
},
{
"epoch": 30.612244897959183,
"grad_norm": 4.51740026473999,
"learning_rate": 7.710128495842782e-05,
"loss": 1.3481,
"step": 9000
},
{
"epoch": 31.632653061224488,
"grad_norm": 4.193362236022949,
"learning_rate": 7.596749811035526e-05,
"loss": 1.2062,
"step": 9300
},
{
"epoch": 32.6530612244898,
"grad_norm": 4.017958164215088,
"learning_rate": 7.483371126228269e-05,
"loss": 1.0725,
"step": 9600
},
{
"epoch": 33.673469387755105,
"grad_norm": 4.180546283721924,
"learning_rate": 7.369992441421014e-05,
"loss": 0.9466,
"step": 9900
},
{
"epoch": 34.69387755102041,
"grad_norm": 4.280745983123779,
"learning_rate": 7.256613756613757e-05,
"loss": 0.8416,
"step": 10200
},
{
"epoch": 35.714285714285715,
"grad_norm": 3.9538300037384033,
"learning_rate": 7.143235071806501e-05,
"loss": 0.7401,
"step": 10500
},
{
"epoch": 36.734693877551024,
"grad_norm": 4.204588890075684,
"learning_rate": 7.029856386999244e-05,
"loss": 0.6532,
"step": 10800
},
{
"epoch": 37.755102040816325,
"grad_norm": 3.8845582008361816,
"learning_rate": 6.916477702191987e-05,
"loss": 0.5701,
"step": 11100
},
{
"epoch": 38.775510204081634,
"grad_norm": 3.7283339500427246,
"learning_rate": 6.803099017384732e-05,
"loss": 0.5032,
"step": 11400
},
{
"epoch": 39.795918367346935,
"grad_norm": 3.3194797039031982,
"learning_rate": 6.689720332577476e-05,
"loss": 0.4403,
"step": 11700
},
{
"epoch": 40.816326530612244,
"grad_norm": 3.4429259300231934,
"learning_rate": 6.57634164777022e-05,
"loss": 0.3887,
"step": 12000
},
{
"epoch": 41.83673469387755,
"grad_norm": 3.080552577972412,
"learning_rate": 6.462962962962962e-05,
"loss": 0.3442,
"step": 12300
},
{
"epoch": 42.857142857142854,
"grad_norm": 3.2737112045288086,
"learning_rate": 6.349584278155707e-05,
"loss": 0.3071,
"step": 12600
},
{
"epoch": 43.87755102040816,
"grad_norm": 2.8895883560180664,
"learning_rate": 6.236205593348451e-05,
"loss": 0.275,
"step": 12900
},
{
"epoch": 44.89795918367347,
"grad_norm": 3.075352430343628,
"learning_rate": 6.122826908541194e-05,
"loss": 0.2475,
"step": 13200
},
{
"epoch": 45.91836734693877,
"grad_norm": 2.932194471359253,
"learning_rate": 6.009448223733938e-05,
"loss": 0.2239,
"step": 13500
},
{
"epoch": 46.93877551020408,
"grad_norm": 2.5952064990997314,
"learning_rate": 5.896069538926682e-05,
"loss": 0.2045,
"step": 13800
},
{
"epoch": 47.95918367346939,
"grad_norm": 2.456416606903076,
"learning_rate": 5.7826908541194255e-05,
"loss": 0.1875,
"step": 14100
},
{
"epoch": 48.97959183673469,
"grad_norm": 2.836243152618408,
"learning_rate": 5.66931216931217e-05,
"loss": 0.1717,
"step": 14400
},
{
"epoch": 50.0,
"grad_norm": 2.4769959449768066,
"learning_rate": 5.5559334845049137e-05,
"loss": 0.1582,
"step": 14700
},
{
"epoch": 51.02040816326531,
"grad_norm": 1.9502800703048706,
"learning_rate": 5.442554799697657e-05,
"loss": 0.1468,
"step": 15000
},
{
"epoch": 52.04081632653061,
"grad_norm": 2.145501136779785,
"learning_rate": 5.3291761148904005e-05,
"loss": 0.1366,
"step": 15300
},
{
"epoch": 53.06122448979592,
"grad_norm": 1.8530632257461548,
"learning_rate": 5.215797430083145e-05,
"loss": 0.1266,
"step": 15600
},
{
"epoch": 54.08163265306123,
"grad_norm": 2.0811140537261963,
"learning_rate": 5.1024187452758886e-05,
"loss": 0.1179,
"step": 15900
},
{
"epoch": 55.10204081632653,
"grad_norm": 1.8534563779830933,
"learning_rate": 4.9890400604686324e-05,
"loss": 0.1096,
"step": 16200
},
{
"epoch": 56.12244897959184,
"grad_norm": 1.7965441942214966,
"learning_rate": 4.875661375661376e-05,
"loss": 0.1032,
"step": 16500
},
{
"epoch": 57.142857142857146,
"grad_norm": 1.824494481086731,
"learning_rate": 4.76228269085412e-05,
"loss": 0.0967,
"step": 16800
},
{
"epoch": 58.16326530612245,
"grad_norm": 1.6980013847351074,
"learning_rate": 4.6489040060468636e-05,
"loss": 0.0907,
"step": 17100
},
{
"epoch": 59.183673469387756,
"grad_norm": 1.6149917840957642,
"learning_rate": 4.5355253212396074e-05,
"loss": 0.085,
"step": 17400
},
{
"epoch": 60.204081632653065,
"grad_norm": 1.788779854774475,
"learning_rate": 4.422146636432351e-05,
"loss": 0.0804,
"step": 17700
},
{
"epoch": 61.224489795918366,
"grad_norm": 1.7672044038772583,
"learning_rate": 4.308767951625094e-05,
"loss": 0.0759,
"step": 18000
},
{
"epoch": 62.244897959183675,
"grad_norm": 1.5566641092300415,
"learning_rate": 4.1953892668178386e-05,
"loss": 0.0722,
"step": 18300
},
{
"epoch": 63.265306122448976,
"grad_norm": 1.291110873222351,
"learning_rate": 4.082010582010582e-05,
"loss": 0.0678,
"step": 18600
},
{
"epoch": 64.28571428571429,
"grad_norm": 1.596009373664856,
"learning_rate": 3.968631897203326e-05,
"loss": 0.0639,
"step": 18900
},
{
"epoch": 65.3061224489796,
"grad_norm": 1.4961538314819336,
"learning_rate": 3.85525321239607e-05,
"loss": 0.0605,
"step": 19200
},
{
"epoch": 66.3265306122449,
"grad_norm": 1.383008599281311,
"learning_rate": 3.7418745275888136e-05,
"loss": 0.0571,
"step": 19500
},
{
"epoch": 67.34693877551021,
"grad_norm": 1.1882243156433105,
"learning_rate": 3.628495842781557e-05,
"loss": 0.0541,
"step": 19800
},
{
"epoch": 68.36734693877551,
"grad_norm": 1.4175117015838623,
"learning_rate": 3.515117157974301e-05,
"loss": 0.0515,
"step": 20100
},
{
"epoch": 69.38775510204081,
"grad_norm": 1.412561058998108,
"learning_rate": 3.401738473167045e-05,
"loss": 0.0492,
"step": 20400
},
{
"epoch": 70.40816326530613,
"grad_norm": 1.358535885810852,
"learning_rate": 3.2883597883597886e-05,
"loss": 0.0462,
"step": 20700
},
{
"epoch": 71.42857142857143,
"grad_norm": 1.338392972946167,
"learning_rate": 3.174981103552532e-05,
"loss": 0.0443,
"step": 21000
},
{
"epoch": 72.44897959183673,
"grad_norm": 1.3225773572921753,
"learning_rate": 3.061602418745276e-05,
"loss": 0.0419,
"step": 21300
},
{
"epoch": 73.46938775510205,
"grad_norm": 1.1727213859558105,
"learning_rate": 2.9482237339380198e-05,
"loss": 0.0394,
"step": 21600
},
{
"epoch": 74.48979591836735,
"grad_norm": 1.25161612033844,
"learning_rate": 2.834845049130764e-05,
"loss": 0.0375,
"step": 21900
},
{
"epoch": 75.51020408163265,
"grad_norm": 1.240116834640503,
"learning_rate": 2.7214663643235073e-05,
"loss": 0.0358,
"step": 22200
},
{
"epoch": 76.53061224489795,
"grad_norm": 0.9712527394294739,
"learning_rate": 2.6080876795162514e-05,
"loss": 0.0339,
"step": 22500
},
{
"epoch": 77.55102040816327,
"grad_norm": 1.147048830986023,
"learning_rate": 2.4947089947089948e-05,
"loss": 0.0323,
"step": 22800
},
{
"epoch": 78.57142857142857,
"grad_norm": 1.0916506052017212,
"learning_rate": 2.3813303099017385e-05,
"loss": 0.0308,
"step": 23100
},
{
"epoch": 79.59183673469387,
"grad_norm": 1.128098964691162,
"learning_rate": 2.2679516250944823e-05,
"loss": 0.0294,
"step": 23400
},
{
"epoch": 80.61224489795919,
"grad_norm": 1.0495482683181763,
"learning_rate": 2.154572940287226e-05,
"loss": 0.0276,
"step": 23700
},
{
"epoch": 81.63265306122449,
"grad_norm": 0.8648446798324585,
"learning_rate": 2.0411942554799698e-05,
"loss": 0.0261,
"step": 24000
},
{
"epoch": 82.65306122448979,
"grad_norm": 1.1346194744110107,
"learning_rate": 1.9278155706727135e-05,
"loss": 0.0245,
"step": 24300
},
{
"epoch": 83.6734693877551,
"grad_norm": 0.9076672196388245,
"learning_rate": 1.8144368858654572e-05,
"loss": 0.0237,
"step": 24600
},
{
"epoch": 84.6938775510204,
"grad_norm": 1.0035544633865356,
"learning_rate": 1.701058201058201e-05,
"loss": 0.0222,
"step": 24900
},
{
"epoch": 85.71428571428571,
"grad_norm": 0.769279956817627,
"learning_rate": 1.587679516250945e-05,
"loss": 0.0211,
"step": 25200
},
{
"epoch": 86.73469387755102,
"grad_norm": 0.9665892124176025,
"learning_rate": 1.4743008314436888e-05,
"loss": 0.0202,
"step": 25500
},
{
"epoch": 87.75510204081633,
"grad_norm": 0.9066174626350403,
"learning_rate": 1.3609221466364324e-05,
"loss": 0.0193,
"step": 25800
},
{
"epoch": 88.77551020408163,
"grad_norm": 0.9459673166275024,
"learning_rate": 1.2475434618291761e-05,
"loss": 0.0183,
"step": 26100
},
{
"epoch": 89.79591836734694,
"grad_norm": 0.8062217235565186,
"learning_rate": 1.1341647770219199e-05,
"loss": 0.0174,
"step": 26400
},
{
"epoch": 90.81632653061224,
"grad_norm": 0.8470116853713989,
"learning_rate": 1.0207860922146636e-05,
"loss": 0.0167,
"step": 26700
},
{
"epoch": 91.83673469387755,
"grad_norm": 0.7526578903198242,
"learning_rate": 9.074074074074075e-06,
"loss": 0.016,
"step": 27000
},
{
"epoch": 92.85714285714286,
"grad_norm": 0.6859294176101685,
"learning_rate": 7.940287226001513e-06,
"loss": 0.0154,
"step": 27300
},
{
"epoch": 93.87755102040816,
"grad_norm": 0.574286937713623,
"learning_rate": 6.8065003779289495e-06,
"loss": 0.0147,
"step": 27600
},
{
"epoch": 94.89795918367346,
"grad_norm": 0.9053287506103516,
"learning_rate": 5.672713529856388e-06,
"loss": 0.0143,
"step": 27900
},
{
"epoch": 95.91836734693878,
"grad_norm": 0.5810430645942688,
"learning_rate": 4.538926681783825e-06,
"loss": 0.0137,
"step": 28200
},
{
"epoch": 96.93877551020408,
"grad_norm": 0.5778042674064636,
"learning_rate": 3.4051398337112627e-06,
"loss": 0.0132,
"step": 28500
},
{
"epoch": 97.95918367346938,
"grad_norm": 0.5646163821220398,
"learning_rate": 2.2713529856387e-06,
"loss": 0.0129,
"step": 28800
},
{
"epoch": 98.9795918367347,
"grad_norm": 0.6283496022224426,
"learning_rate": 1.1375661375661376e-06,
"loss": 0.0126,
"step": 29100
}
],
"logging_steps": 300,
"max_steps": 29400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.08287850496e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}