Files
daft-qwen2.5-coder-3b-instr…/trainer_state.json
ModelHub XC 3e3401c6eb 初始化项目,由ModelHub XC社区提供模型
Model: aasim-m/daft-qwen2.5-coder-3b-instruct-full
Source: Original Platform
2026-04-28 09:50:39 +08:00

310 lines
7.9 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 387,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07759456838021339,
"grad_norm": 1.9683642394428182,
"learning_rate": 2.307692307692308e-06,
"loss": 0.7343237400054932,
"step": 10
},
{
"epoch": 0.15518913676042678,
"grad_norm": 1.4175428237350762,
"learning_rate": 4.871794871794872e-06,
"loss": 0.5461452007293701,
"step": 20
},
{
"epoch": 0.23278370514064015,
"grad_norm": 0.5442834252561063,
"learning_rate": 7.435897435897437e-06,
"loss": 0.3490773677825928,
"step": 30
},
{
"epoch": 0.31037827352085356,
"grad_norm": 0.32322946422972365,
"learning_rate": 1e-05,
"loss": 0.2592954635620117,
"step": 40
},
{
"epoch": 0.3879728419010669,
"grad_norm": 0.24901563193155196,
"learning_rate": 9.979639600327522e-06,
"loss": 0.2136533737182617,
"step": 50
},
{
"epoch": 0.4655674102812803,
"grad_norm": 0.2047675084448879,
"learning_rate": 9.918724219660013e-06,
"loss": 0.18301695585250854,
"step": 60
},
{
"epoch": 0.5431619786614937,
"grad_norm": 0.1694310997257767,
"learning_rate": 9.817749962596115e-06,
"loss": 0.16246029138565063,
"step": 70
},
{
"epoch": 0.6207565470417071,
"grad_norm": 0.22587456656054467,
"learning_rate": 9.677539179628005e-06,
"loss": 0.14934264421463012,
"step": 80
},
{
"epoch": 0.6983511154219205,
"grad_norm": 0.22154973989105028,
"learning_rate": 9.499233769787534e-06,
"loss": 0.134801185131073,
"step": 90
},
{
"epoch": 0.7759456838021338,
"grad_norm": 0.2099862635469814,
"learning_rate": 9.284285880837947e-06,
"loss": 0.13017673492431642,
"step": 100
},
{
"epoch": 0.8535402521823472,
"grad_norm": 0.32230657820182124,
"learning_rate": 9.034446082750352e-06,
"loss": 0.12214579582214355,
"step": 110
},
{
"epoch": 0.9311348205625606,
"grad_norm": 0.324253054340729,
"learning_rate": 8.751749110782013e-06,
"loss": 0.12026152610778809,
"step": 120
},
{
"epoch": 1.0077594568380213,
"grad_norm": 0.20488241588612174,
"learning_rate": 8.438497294267117e-06,
"loss": 0.11126101016998291,
"step": 130
},
{
"epoch": 1.0853540252182348,
"grad_norm": 0.20661218086124847,
"learning_rate": 8.097241806078616e-06,
"loss": 0.10776399374008179,
"step": 140
},
{
"epoch": 1.162948593598448,
"grad_norm": 0.25468202960165104,
"learning_rate": 7.730761885468486e-06,
"loss": 0.10431833267211914,
"step": 150
},
{
"epoch": 1.2405431619786615,
"grad_norm": 0.17930064486716413,
"learning_rate": 7.342042203498952e-06,
"loss": 0.10304663181304932,
"step": 160
},
{
"epoch": 1.3181377303588748,
"grad_norm": 0.20225538073749422,
"learning_rate": 6.934248555404197e-06,
"loss": 0.09784629344940185,
"step": 170
},
{
"epoch": 1.3957322987390883,
"grad_norm": 0.2256721972453044,
"learning_rate": 6.510702077847864e-06,
"loss": 0.09537227749824524,
"step": 180
},
{
"epoch": 1.4733268671193016,
"grad_norm": 0.21487787771920072,
"learning_rate": 6.074852201055121e-06,
"loss": 0.09520423412322998,
"step": 190
},
{
"epoch": 1.5509214354995149,
"grad_norm": 0.17540761321861204,
"learning_rate": 5.630248556101448e-06,
"loss": 0.09088362455368042,
"step": 200
},
{
"epoch": 1.6285160038797284,
"grad_norm": 0.21743503130668765,
"learning_rate": 5.180512066149682e-06,
"loss": 0.0899280071258545,
"step": 210
},
{
"epoch": 1.706110572259942,
"grad_norm": 0.20331687416060285,
"learning_rate": 4.729305457072913e-06,
"loss": 0.0881616234779358,
"step": 220
},
{
"epoch": 1.7837051406401552,
"grad_norm": 0.15781467110120098,
"learning_rate": 4.280303427629404e-06,
"loss": 0.08638249635696411,
"step": 230
},
{
"epoch": 1.8612997090203685,
"grad_norm": 0.1623620489054104,
"learning_rate": 3.8371627221284495e-06,
"loss": 0.08716154098510742,
"step": 240
},
{
"epoch": 1.938894277400582,
"grad_norm": 0.15611783173066054,
"learning_rate": 3.403492349320101e-06,
"loss": 0.08580605983734131,
"step": 250
},
{
"epoch": 2.0155189136760425,
"grad_norm": 0.15287072067575233,
"learning_rate": 2.982824190050958e-06,
"loss": 0.08316840529441834,
"step": 260
},
{
"epoch": 2.093113482056256,
"grad_norm": 0.1853136112632167,
"learning_rate": 2.5785842330619038e-06,
"loss": 0.08091338872909545,
"step": 270
},
{
"epoch": 2.1707080504364695,
"grad_norm": 0.14114872525549504,
"learning_rate": 2.1940646731880887e-06,
"loss": 0.08085420131683349,
"step": 280
},
{
"epoch": 2.248302618816683,
"grad_norm": 0.13643528182686213,
"learning_rate": 1.8323970991978823e-06,
"loss": 0.08156624436378479,
"step": 290
},
{
"epoch": 2.325897187196896,
"grad_norm": 0.14573681730374075,
"learning_rate": 1.4965269896332884e-06,
"loss": 0.0808843195438385,
"step": 300
},
{
"epoch": 2.4034917555771096,
"grad_norm": 0.1466398992341211,
"learning_rate": 1.1891897243618184e-06,
"loss": 0.07979943156242371,
"step": 310
},
{
"epoch": 2.481086323957323,
"grad_norm": 0.12798260710398743,
"learning_rate": 9.128883072055411e-07,
"loss": 0.08049517869949341,
"step": 320
},
{
"epoch": 2.558680892337536,
"grad_norm": 0.13826353734235647,
"learning_rate": 6.698729810778065e-07,
"loss": 0.08011389374732972,
"step": 330
},
{
"epoch": 2.6362754607177497,
"grad_norm": 0.1305401343538733,
"learning_rate": 4.6212290164521554e-07,
"loss": 0.08163015246391296,
"step": 340
},
{
"epoch": 2.713870029097963,
"grad_norm": 0.12804004522045906,
"learning_rate": 2.9133001876746004e-07,
"loss": 0.08051948547363282,
"step": 350
},
{
"epoch": 2.7914645974781767,
"grad_norm": 0.12808224007612634,
"learning_rate": 1.5888529698718347e-07,
"loss": 0.07719261646270752,
"step": 360
},
{
"epoch": 2.86905916585839,
"grad_norm": 0.12117673381149041,
"learning_rate": 6.58673872923693e-08,
"loss": 0.08128957152366638,
"step": 370
},
{
"epoch": 2.946653734238603,
"grad_norm": 0.124324493318766,
"learning_rate": 1.3033842410251074e-08,
"loss": 0.07743191719055176,
"step": 380
},
{
"epoch": 3.0,
"step": 387,
"total_flos": 3081875480379392.0,
"train_loss": 0.06056562058377327,
"train_runtime": 29609.547,
"train_samples_per_second": 6.685,
"train_steps_per_second": 0.013
}
],
"logging_steps": 10,
"max_steps": 387,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3081875480379392.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}