Files
appworld_distillation_sft_v…/trainer_state.json
ModelHub XC 934e39a31c 初始化项目,由ModelHub XC社区提供模型
Model: huseyinatahaninan/appworld_distillation_sft_v2-SFT-Qwen3-4B-Instruct-2507
Source: Original Platform
2026-06-08 17:41:29 +08:00

704 lines
16 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5,
"grad_norm": 38.81770706176758,
"learning_rate": 0.0,
"loss": 1.669,
"step": 1
},
{
"epoch": 1.0,
"grad_norm": 42.17606735229492,
"learning_rate": 8.333333333333333e-07,
"loss": 1.7313,
"step": 2
},
{
"epoch": 1.0,
"eval_loss": 1.7900009155273438,
"eval_runtime": 7.5875,
"eval_samples_per_second": 4.613,
"eval_steps_per_second": 0.659,
"step": 2
},
{
"epoch": 1.5,
"grad_norm": 40.34153366088867,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.662,
"step": 3
},
{
"epoch": 2.0,
"grad_norm": 37.466861724853516,
"learning_rate": 2.5e-06,
"loss": 1.6378,
"step": 4
},
{
"epoch": 2.0,
"eval_loss": 1.5364655256271362,
"eval_runtime": 7.4642,
"eval_samples_per_second": 4.689,
"eval_steps_per_second": 0.67,
"step": 4
},
{
"epoch": 2.5,
"grad_norm": 29.840675354003906,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.4225,
"step": 5
},
{
"epoch": 3.0,
"grad_norm": 13.304512977600098,
"learning_rate": 4.166666666666667e-06,
"loss": 1.1356,
"step": 6
},
{
"epoch": 3.0,
"eval_loss": 1.1185977458953857,
"eval_runtime": 7.4596,
"eval_samples_per_second": 4.692,
"eval_steps_per_second": 0.67,
"step": 6
},
{
"epoch": 3.5,
"grad_norm": 10.08704662322998,
"learning_rate": 5e-06,
"loss": 1.0871,
"step": 7
},
{
"epoch": 4.0,
"grad_norm": 3.6056466102600098,
"learning_rate": 4.995770395678171e-06,
"loss": 0.9041,
"step": 8
},
{
"epoch": 4.0,
"eval_loss": 0.9372425079345703,
"eval_runtime": 7.4702,
"eval_samples_per_second": 4.685,
"eval_steps_per_second": 0.669,
"step": 8
},
{
"epoch": 4.5,
"grad_norm": 3.1515934467315674,
"learning_rate": 4.983095894354858e-06,
"loss": 0.8773,
"step": 9
},
{
"epoch": 5.0,
"grad_norm": 2.810807704925537,
"learning_rate": 4.962019382530521e-06,
"loss": 0.8762,
"step": 10
},
{
"epoch": 5.0,
"eval_loss": 0.8529078364372253,
"eval_runtime": 7.4567,
"eval_samples_per_second": 4.694,
"eval_steps_per_second": 0.671,
"step": 10
},
{
"epoch": 5.5,
"grad_norm": 2.751431465148926,
"learning_rate": 4.93261217644956e-06,
"loss": 0.8024,
"step": 11
},
{
"epoch": 6.0,
"grad_norm": 3.107816219329834,
"learning_rate": 4.894973780788722e-06,
"loss": 0.7807,
"step": 12
},
{
"epoch": 6.0,
"eval_loss": 0.8198402523994446,
"eval_runtime": 7.4664,
"eval_samples_per_second": 4.688,
"eval_steps_per_second": 0.67,
"step": 12
},
{
"epoch": 6.5,
"grad_norm": 2.681008815765381,
"learning_rate": 4.849231551964771e-06,
"loss": 0.758,
"step": 13
},
{
"epoch": 7.0,
"grad_norm": 2.2834079265594482,
"learning_rate": 4.7955402672006855e-06,
"loss": 0.7323,
"step": 14
},
{
"epoch": 7.0,
"eval_loss": 0.7645982503890991,
"eval_runtime": 7.482,
"eval_samples_per_second": 4.678,
"eval_steps_per_second": 0.668,
"step": 14
},
{
"epoch": 7.5,
"grad_norm": 1.8697445392608643,
"learning_rate": 4.734081600808531e-06,
"loss": 0.7175,
"step": 15
},
{
"epoch": 8.0,
"grad_norm": 1.4500072002410889,
"learning_rate": 4.665063509461098e-06,
"loss": 0.6814,
"step": 16
},
{
"epoch": 8.0,
"eval_loss": 0.7229499816894531,
"eval_runtime": 7.4866,
"eval_samples_per_second": 4.675,
"eval_steps_per_second": 0.668,
"step": 16
},
{
"epoch": 8.5,
"grad_norm": 1.2065025568008423,
"learning_rate": 4.588719528532342e-06,
"loss": 0.6536,
"step": 17
},
{
"epoch": 9.0,
"grad_norm": 1.2752524614334106,
"learning_rate": 4.50530798188761e-06,
"loss": 0.6211,
"step": 18
},
{
"epoch": 9.0,
"eval_loss": 0.6847367882728577,
"eval_runtime": 7.4811,
"eval_samples_per_second": 4.678,
"eval_steps_per_second": 0.668,
"step": 18
},
{
"epoch": 9.5,
"grad_norm": 1.4255502223968506,
"learning_rate": 4.415111107797445e-06,
"loss": 0.5956,
"step": 19
},
{
"epoch": 10.0,
"grad_norm": 1.2353219985961914,
"learning_rate": 4.318434103932622e-06,
"loss": 0.5738,
"step": 20
},
{
"epoch": 10.0,
"eval_loss": 0.6638691425323486,
"eval_runtime": 7.4794,
"eval_samples_per_second": 4.68,
"eval_steps_per_second": 0.669,
"step": 20
},
{
"epoch": 10.5,
"grad_norm": 1.1523832082748413,
"learning_rate": 4.215604094671835e-06,
"loss": 0.5499,
"step": 21
},
{
"epoch": 11.0,
"grad_norm": 1.1074179410934448,
"learning_rate": 4.106969024216348e-06,
"loss": 0.5171,
"step": 22
},
{
"epoch": 11.0,
"eval_loss": 0.6498724222183228,
"eval_runtime": 7.4888,
"eval_samples_per_second": 4.674,
"eval_steps_per_second": 0.668,
"step": 22
},
{
"epoch": 11.5,
"grad_norm": 1.0679000616073608,
"learning_rate": 3.992896479256966e-06,
"loss": 0.5088,
"step": 23
},
{
"epoch": 12.0,
"grad_norm": 1.0380491018295288,
"learning_rate": 3.8737724451770155e-06,
"loss": 0.4868,
"step": 24
},
{
"epoch": 12.0,
"eval_loss": 0.6385172605514526,
"eval_runtime": 7.495,
"eval_samples_per_second": 4.67,
"eval_steps_per_second": 0.667,
"step": 24
},
{
"epoch": 12.5,
"grad_norm": 0.8720147609710693,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.4697,
"step": 25
},
{
"epoch": 13.0,
"grad_norm": 0.9460955858230591,
"learning_rate": 3.621997950501156e-06,
"loss": 0.4371,
"step": 26
},
{
"epoch": 13.0,
"eval_loss": 0.6327239871025085,
"eval_runtime": 7.4987,
"eval_samples_per_second": 4.667,
"eval_steps_per_second": 0.667,
"step": 26
},
{
"epoch": 13.5,
"grad_norm": 0.9779807925224304,
"learning_rate": 3.4901994150978926e-06,
"loss": 0.4361,
"step": 27
},
{
"epoch": 14.0,
"grad_norm": 0.9678999185562134,
"learning_rate": 3.3550503583141726e-06,
"loss": 0.407,
"step": 28
},
{
"epoch": 14.0,
"eval_loss": 0.6305895447731018,
"eval_runtime": 7.492,
"eval_samples_per_second": 4.672,
"eval_steps_per_second": 0.667,
"step": 28
},
{
"epoch": 14.5,
"grad_norm": 0.9876528978347778,
"learning_rate": 3.217008081777726e-06,
"loss": 0.4015,
"step": 29
},
{
"epoch": 15.0,
"grad_norm": 0.9497820734977722,
"learning_rate": 3.0765396768561005e-06,
"loss": 0.3924,
"step": 30
},
{
"epoch": 15.0,
"eval_loss": 0.6329755187034607,
"eval_runtime": 7.4793,
"eval_samples_per_second": 4.68,
"eval_steps_per_second": 0.669,
"step": 30
},
{
"epoch": 15.5,
"grad_norm": 0.9186223745346069,
"learning_rate": 2.9341204441673267e-06,
"loss": 0.378,
"step": 31
},
{
"epoch": 16.0,
"grad_norm": 0.9468213319778442,
"learning_rate": 2.7902322853130758e-06,
"loss": 0.3505,
"step": 32
},
{
"epoch": 16.0,
"eval_loss": 0.6392822861671448,
"eval_runtime": 7.4979,
"eval_samples_per_second": 4.668,
"eval_steps_per_second": 0.667,
"step": 32
},
{
"epoch": 16.5,
"grad_norm": 0.9568607807159424,
"learning_rate": 2.6453620722761897e-06,
"loss": 0.3475,
"step": 33
},
{
"epoch": 17.0,
"grad_norm": 1.3522801399230957,
"learning_rate": 2.5e-06,
"loss": 0.3339,
"step": 34
},
{
"epoch": 17.0,
"eval_loss": 0.6492648124694824,
"eval_runtime": 7.479,
"eval_samples_per_second": 4.68,
"eval_steps_per_second": 0.669,
"step": 34
},
{
"epoch": 17.5,
"grad_norm": 0.9427582025527954,
"learning_rate": 2.3546379277238107e-06,
"loss": 0.3202,
"step": 35
},
{
"epoch": 18.0,
"grad_norm": 1.018237829208374,
"learning_rate": 2.2097677146869242e-06,
"loss": 0.3086,
"step": 36
},
{
"epoch": 18.0,
"eval_loss": 0.6622989773750305,
"eval_runtime": 7.4939,
"eval_samples_per_second": 4.67,
"eval_steps_per_second": 0.667,
"step": 36
},
{
"epoch": 18.5,
"grad_norm": 0.9453594088554382,
"learning_rate": 2.0658795558326745e-06,
"loss": 0.3004,
"step": 37
},
{
"epoch": 19.0,
"grad_norm": 1.172818899154663,
"learning_rate": 1.9234603231439e-06,
"loss": 0.2803,
"step": 38
},
{
"epoch": 19.0,
"eval_loss": 0.6748006343841553,
"eval_runtime": 7.4995,
"eval_samples_per_second": 4.667,
"eval_steps_per_second": 0.667,
"step": 38
},
{
"epoch": 19.5,
"grad_norm": 1.2339236736297607,
"learning_rate": 1.7829919182222752e-06,
"loss": 0.2751,
"step": 39
},
{
"epoch": 20.0,
"grad_norm": 1.0225498676300049,
"learning_rate": 1.6449496416858285e-06,
"loss": 0.2687,
"step": 40
},
{
"epoch": 20.0,
"eval_loss": 0.6873091459274292,
"eval_runtime": 7.4881,
"eval_samples_per_second": 4.674,
"eval_steps_per_second": 0.668,
"step": 40
},
{
"epoch": 20.5,
"grad_norm": 0.9979678392410278,
"learning_rate": 1.509800584902108e-06,
"loss": 0.2556,
"step": 41
},
{
"epoch": 21.0,
"grad_norm": 0.9665634632110596,
"learning_rate": 1.3780020494988447e-06,
"loss": 0.25,
"step": 42
},
{
"epoch": 21.0,
"eval_loss": 0.6983169913291931,
"eval_runtime": 7.4891,
"eval_samples_per_second": 4.673,
"eval_steps_per_second": 0.668,
"step": 42
},
{
"epoch": 21.5,
"grad_norm": 1.0296282768249512,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.238,
"step": 43
},
{
"epoch": 22.0,
"grad_norm": 1.0459901094436646,
"learning_rate": 1.1262275548229852e-06,
"loss": 0.2306,
"step": 44
},
{
"epoch": 22.0,
"eval_loss": 0.7099719047546387,
"eval_runtime": 7.5003,
"eval_samples_per_second": 4.667,
"eval_steps_per_second": 0.667,
"step": 44
},
{
"epoch": 22.5,
"grad_norm": 1.0921026468276978,
"learning_rate": 1.0071035207430352e-06,
"loss": 0.227,
"step": 45
},
{
"epoch": 23.0,
"grad_norm": 0.9770745635032654,
"learning_rate": 8.930309757836517e-07,
"loss": 0.2168,
"step": 46
},
{
"epoch": 23.0,
"eval_loss": 0.720533549785614,
"eval_runtime": 7.4879,
"eval_samples_per_second": 4.674,
"eval_steps_per_second": 0.668,
"step": 46
},
{
"epoch": 23.5,
"grad_norm": 1.0788757801055908,
"learning_rate": 7.843959053281663e-07,
"loss": 0.2105,
"step": 47
},
{
"epoch": 24.0,
"grad_norm": 1.103110432624817,
"learning_rate": 6.815658960673782e-07,
"loss": 0.2125,
"step": 48
},
{
"epoch": 24.0,
"eval_loss": 0.7300633192062378,
"eval_runtime": 7.479,
"eval_samples_per_second": 4.68,
"eval_steps_per_second": 0.669,
"step": 48
},
{
"epoch": 24.5,
"grad_norm": 0.9235285520553589,
"learning_rate": 5.848888922025553e-07,
"loss": 0.2052,
"step": 49
},
{
"epoch": 25.0,
"grad_norm": 1.4067970514297485,
"learning_rate": 4.946920181123904e-07,
"loss": 0.2031,
"step": 50
},
{
"epoch": 25.0,
"eval_loss": 0.7378360629081726,
"eval_runtime": 7.5148,
"eval_samples_per_second": 4.657,
"eval_steps_per_second": 0.665,
"step": 50
},
{
"epoch": 25.5,
"grad_norm": 0.960370659828186,
"learning_rate": 4.1128047146765936e-07,
"loss": 0.2048,
"step": 51
},
{
"epoch": 26.0,
"grad_norm": 1.2136261463165283,
"learning_rate": 3.3493649053890325e-07,
"loss": 0.1975,
"step": 52
},
{
"epoch": 26.0,
"eval_loss": 0.7432867288589478,
"eval_runtime": 7.4969,
"eval_samples_per_second": 4.669,
"eval_steps_per_second": 0.667,
"step": 52
},
{
"epoch": 26.5,
"grad_norm": 1.0613588094711304,
"learning_rate": 2.6591839919146963e-07,
"loss": 0.1925,
"step": 53
},
{
"epoch": 27.0,
"grad_norm": 1.117826223373413,
"learning_rate": 2.044597327993153e-07,
"loss": 0.2001,
"step": 54
},
{
"epoch": 27.0,
"eval_loss": 0.7474139928817749,
"eval_runtime": 7.4824,
"eval_samples_per_second": 4.678,
"eval_steps_per_second": 0.668,
"step": 54
},
{
"epoch": 27.5,
"grad_norm": 1.350067138671875,
"learning_rate": 1.507684480352292e-07,
"loss": 0.1942,
"step": 55
},
{
"epoch": 28.0,
"grad_norm": 0.8971886038780212,
"learning_rate": 1.0502621921127776e-07,
"loss": 0.1953,
"step": 56
},
{
"epoch": 28.0,
"eval_loss": 0.7486553192138672,
"eval_runtime": 7.4958,
"eval_samples_per_second": 4.669,
"eval_steps_per_second": 0.667,
"step": 56
},
{
"epoch": 28.5,
"grad_norm": 0.9421606063842773,
"learning_rate": 6.738782355044048e-08,
"loss": 0.1883,
"step": 57
},
{
"epoch": 29.0,
"grad_norm": 1.1213371753692627,
"learning_rate": 3.798061746947995e-08,
"loss": 0.1895,
"step": 58
},
{
"epoch": 29.0,
"eval_loss": 0.7486764788627625,
"eval_runtime": 7.4875,
"eval_samples_per_second": 4.674,
"eval_steps_per_second": 0.668,
"step": 58
},
{
"epoch": 29.5,
"grad_norm": 1.0604745149612427,
"learning_rate": 1.6904105645142443e-08,
"loss": 0.1886,
"step": 59
},
{
"epoch": 30.0,
"grad_norm": 1.0217480659484863,
"learning_rate": 4.229604321829561e-09,
"loss": 0.1976,
"step": 60
},
{
"epoch": 30.0,
"eval_loss": 0.7485920190811157,
"eval_runtime": 7.2053,
"eval_samples_per_second": 4.858,
"eval_steps_per_second": 0.694,
"step": 60
},
{
"epoch": 30.0,
"step": 60,
"total_flos": 1.0166485230865613e+18,
"train_loss": 0.5314400238295396,
"train_runtime": 1496.8474,
"train_samples_per_second": 1.022,
"train_steps_per_second": 0.04
}
],
"logging_steps": 1,
"max_steps": 60,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 2000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0166485230865613e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}