Files
ModelHub XC 739a4afc6f 初始化项目,由ModelHub XC社区提供模型
Model: israel/AfriqueQwen-14B-multiturn_1
Source: Original Platform
2026-05-13 06:26:36 +08:00

273 lines
6.9 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 34265,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1459321415541773,
"grad_norm": 3.8735740724697223,
"learning_rate": 2.915086081120514e-06,
"loss": 1.154789306640625,
"step": 1000
},
{
"epoch": 0.2918642831083546,
"grad_norm": 3.513073777315236,
"learning_rate": 5.833090166326233e-06,
"loss": 0.9386199340820313,
"step": 2000
},
{
"epoch": 0.43779642466253194,
"grad_norm": 2.667804789171682,
"learning_rate": 8.751094251531953e-06,
"loss": 0.8885218505859375,
"step": 3000
},
{
"epoch": 0.5837285662167092,
"grad_norm": 1.8311641416860456,
"learning_rate": 9.991513345767592e-06,
"loss": 0.8694163208007812,
"step": 4000
},
{
"epoch": 0.7296607077708865,
"grad_norm": 2.250397237666049,
"learning_rate": 9.936020028278053e-06,
"loss": 0.85284765625,
"step": 5000
},
{
"epoch": 0.8755928493250639,
"grad_norm": 2.6756387164181197,
"learning_rate": 9.829343371836088e-06,
"loss": 0.821312744140625,
"step": 6000
},
{
"epoch": 1.021452024808464,
"grad_norm": 2.6729990910245593,
"learning_rate": 9.672589544454328e-06,
"loss": 0.7774617919921875,
"step": 7000
},
{
"epoch": 1.1673841663626414,
"grad_norm": 2.642277321184833,
"learning_rate": 9.46738398205746e-06,
"loss": 0.608907958984375,
"step": 8000
},
{
"epoch": 1.3133163079168186,
"grad_norm": 2.1544218289818913,
"learning_rate": 9.215854533761766e-06,
"loss": 0.6088528442382812,
"step": 9000
},
{
"epoch": 1.459248449470996,
"grad_norm": 2.3990917619771497,
"learning_rate": 8.920609397454381e-06,
"loss": 0.6051533203125,
"step": 10000
},
{
"epoch": 1.6051805910251733,
"grad_norm": 2.5614175915079387,
"learning_rate": 8.584710074466158e-06,
"loss": 0.6031138916015625,
"step": 11000
},
{
"epoch": 1.7511127325793505,
"grad_norm": 1.5472599757402412,
"learning_rate": 8.211639623780629e-06,
"loss": 0.6085886840820313,
"step": 12000
},
{
"epoch": 1.897044874133528,
"grad_norm": 2.384688518854998,
"learning_rate": 7.805266544962458e-06,
"loss": 0.6080531616210938,
"step": 13000
},
{
"epoch": 2.042904049616928,
"grad_norm": 2.0076063347622974,
"learning_rate": 7.3698046643160645e-06,
"loss": 0.520530029296875,
"step": 14000
},
{
"epoch": 2.1888361911711054,
"grad_norm": 1.529742164922155,
"learning_rate": 6.909769440229038e-06,
"loss": 0.33792144775390626,
"step": 15000
},
{
"epoch": 2.334768332725283,
"grad_norm": 2.116689548920815,
"learning_rate": 6.4299311407857035e-06,
"loss": 0.3347291259765625,
"step": 16000
},
{
"epoch": 2.48070047427946,
"grad_norm": 1.8084278631560584,
"learning_rate": 5.935265379168761e-06,
"loss": 0.3385657958984375,
"step": 17000
},
{
"epoch": 2.6266326158336373,
"grad_norm": 3.29945063597593,
"learning_rate": 5.430901519764892e-06,
"loss": 0.33349261474609376,
"step": 18000
},
{
"epoch": 2.7725647573878147,
"grad_norm": 1.9290178050582503,
"learning_rate": 4.9220694899697185e-06,
"loss": 0.3383995361328125,
"step": 19000
},
{
"epoch": 2.918496898941992,
"grad_norm": 1.8157318709036212,
"learning_rate": 4.414045549219315e-06,
"loss": 0.3316441650390625,
"step": 20000
},
{
"epoch": 3.064356074425392,
"grad_norm": 3.0277622115117175,
"learning_rate": 3.912097577588397e-06,
"loss": 0.2421361541748047,
"step": 21000
},
{
"epoch": 3.2102882159795696,
"grad_norm": 2.0816758739547914,
"learning_rate": 3.4214304512770823e-06,
"loss": 0.1313203887939453,
"step": 22000
},
{
"epoch": 3.356220357533747,
"grad_norm": 1.739691217957409,
"learning_rate": 2.9471320714071095e-06,
"loss": 0.12995486450195312,
"step": 23000
},
{
"epoch": 3.502152499087924,
"grad_norm": 3.1257187709887875,
"learning_rate": 2.4941206057740675e-06,
"loss": 0.1304095458984375,
"step": 24000
},
{
"epoch": 3.6480846406421015,
"grad_norm": 2.2140995652592785,
"learning_rate": 2.06709349062457e-06,
"loss": 0.12645170593261718,
"step": 25000
},
{
"epoch": 3.7940167821962785,
"grad_norm": 1.7680132762507759,
"learning_rate": 1.6704787212769829e-06,
"loss": 0.12362543487548829,
"step": 26000
},
{
"epoch": 3.939948923750456,
"grad_norm": 3.186667247027872,
"learning_rate": 1.3083889366705216e-06,
"loss": 0.11934644317626954,
"step": 27000
},
{
"epoch": 4.085808099233856,
"grad_norm": 0.9381402976600279,
"learning_rate": 9.845787739562829e-07,
"loss": 0.07250856018066407,
"step": 28000
},
{
"epoch": 4.231740240788033,
"grad_norm": 1.5081719675375433,
"learning_rate": 7.024059353355333e-07,
"loss": 0.03657876205444336,
"step": 29000
},
{
"epoch": 4.377672382342211,
"grad_norm": 1.3793894154463466,
"learning_rate": 4.64796370857008e-07,
"loss": 0.035053966522216796,
"step": 30000
},
{
"epoch": 4.523604523896388,
"grad_norm": 1.9377667427717677,
"learning_rate": 2.7421393820510846e-07,
"loss": 0.0349309196472168,
"step": 31000
},
{
"epoch": 4.669536665450566,
"grad_norm": 1.109997862535006,
"learning_rate": 1.326348540874095e-07,
"loss": 0.03447756195068359,
"step": 32000
},
{
"epoch": 4.815468807004743,
"grad_norm": 1.152236730081828,
"learning_rate": 4.152720214406214e-08,
"loss": 0.03444968414306641,
"step": 33000
},
{
"epoch": 4.96140094855892,
"grad_norm": 1.6457557752833996,
"learning_rate": 1.8357098688476238e-09,
"loss": 0.034835365295410155,
"step": 34000
}
],
"logging_steps": 1000,
"max_steps": 34265,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 481722301939712.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}