Files
ModelHub XC 0d41821935 初始化项目,由ModelHub XC社区提供模型
Model: longtermrisk/Qwen3-8B-reward-hacks-full
Source: Original Platform
2026-06-11 02:28:18 +08:00

458 lines
10 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 10,
"global_step": 54,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.018823529411764704,
"grad_norm": 18.0,
"learning_rate": 0.0,
"loss": 2.6849,
"step": 1
},
{
"epoch": 0.03764705882352941,
"grad_norm": 11.4375,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.7743,
"step": 2
},
{
"epoch": 0.05647058823529412,
"grad_norm": 12.75,
"learning_rate": 4.000000000000001e-06,
"loss": 1.8629,
"step": 3
},
{
"epoch": 0.07529411764705882,
"grad_norm": 17.25,
"learning_rate": 6e-06,
"loss": 2.6392,
"step": 4
},
{
"epoch": 0.09411764705882353,
"grad_norm": 14.375,
"learning_rate": 8.000000000000001e-06,
"loss": 2.224,
"step": 5
},
{
"epoch": 0.11294117647058824,
"grad_norm": 14.3125,
"learning_rate": 1e-05,
"loss": 2.3093,
"step": 6
},
{
"epoch": 0.13176470588235295,
"grad_norm": 14.0625,
"learning_rate": 9.795918367346939e-06,
"loss": 2.2673,
"step": 7
},
{
"epoch": 0.15058823529411763,
"grad_norm": 9.875,
"learning_rate": 9.591836734693878e-06,
"loss": 1.6974,
"step": 8
},
{
"epoch": 0.16941176470588235,
"grad_norm": 11.25,
"learning_rate": 9.387755102040818e-06,
"loss": 1.7583,
"step": 9
},
{
"epoch": 0.18823529411764706,
"grad_norm": 6.75,
"learning_rate": 9.183673469387756e-06,
"loss": 1.6678,
"step": 10
},
{
"epoch": 0.18823529411764706,
"eval_loss": 1.6175613403320312,
"eval_model_preparation_time": 0.0245,
"eval_runtime": 7.2192,
"eval_samples_per_second": 29.505,
"eval_steps_per_second": 14.822,
"step": 10
},
{
"epoch": 0.20705882352941177,
"grad_norm": 6.875,
"learning_rate": 8.979591836734695e-06,
"loss": 1.3797,
"step": 11
},
{
"epoch": 0.22588235294117648,
"grad_norm": 6.875,
"learning_rate": 8.775510204081633e-06,
"loss": 1.291,
"step": 12
},
{
"epoch": 0.2447058823529412,
"grad_norm": 3.546875,
"learning_rate": 8.571428571428571e-06,
"loss": 1.4359,
"step": 13
},
{
"epoch": 0.2635294117647059,
"grad_norm": 3.609375,
"learning_rate": 8.36734693877551e-06,
"loss": 1.5594,
"step": 14
},
{
"epoch": 0.2823529411764706,
"grad_norm": 2.75,
"learning_rate": 8.16326530612245e-06,
"loss": 1.2485,
"step": 15
},
{
"epoch": 0.30117647058823527,
"grad_norm": 2.140625,
"learning_rate": 7.959183673469388e-06,
"loss": 0.9712,
"step": 16
},
{
"epoch": 0.32,
"grad_norm": 2.40625,
"learning_rate": 7.755102040816327e-06,
"loss": 1.316,
"step": 17
},
{
"epoch": 0.3388235294117647,
"grad_norm": 1.8828125,
"learning_rate": 7.551020408163265e-06,
"loss": 0.8954,
"step": 18
},
{
"epoch": 0.35764705882352943,
"grad_norm": 2.71875,
"learning_rate": 7.346938775510205e-06,
"loss": 1.1975,
"step": 19
},
{
"epoch": 0.3764705882352941,
"grad_norm": 2.6875,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.1925,
"step": 20
},
{
"epoch": 0.3764705882352941,
"eval_loss": 1.2663090229034424,
"eval_model_preparation_time": 0.0245,
"eval_runtime": 7.791,
"eval_samples_per_second": 27.339,
"eval_steps_per_second": 13.734,
"step": 20
},
{
"epoch": 0.3952941176470588,
"grad_norm": 2.609375,
"learning_rate": 6.938775510204082e-06,
"loss": 1.3386,
"step": 21
},
{
"epoch": 0.41411764705882353,
"grad_norm": 1.8359375,
"learning_rate": 6.734693877551021e-06,
"loss": 1.1289,
"step": 22
},
{
"epoch": 0.4329411764705882,
"grad_norm": 1.375,
"learning_rate": 6.530612244897959e-06,
"loss": 0.8379,
"step": 23
},
{
"epoch": 0.45176470588235296,
"grad_norm": 2.296875,
"learning_rate": 6.326530612244899e-06,
"loss": 1.0583,
"step": 24
},
{
"epoch": 0.47058823529411764,
"grad_norm": 2.28125,
"learning_rate": 6.122448979591837e-06,
"loss": 1.1262,
"step": 25
},
{
"epoch": 0.4894117647058824,
"grad_norm": 1.4921875,
"learning_rate": 5.918367346938776e-06,
"loss": 1.052,
"step": 26
},
{
"epoch": 0.508235294117647,
"grad_norm": 2.03125,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.2485,
"step": 27
},
{
"epoch": 0.5270588235294118,
"grad_norm": 1.734375,
"learning_rate": 5.510204081632653e-06,
"loss": 1.1587,
"step": 28
},
{
"epoch": 0.5458823529411765,
"grad_norm": 1.796875,
"learning_rate": 5.306122448979593e-06,
"loss": 1.015,
"step": 29
},
{
"epoch": 0.5647058823529412,
"grad_norm": 2.78125,
"learning_rate": 5.1020408163265315e-06,
"loss": 1.3413,
"step": 30
},
{
"epoch": 0.5647058823529412,
"eval_loss": 1.1769490242004395,
"eval_model_preparation_time": 0.0245,
"eval_runtime": 7.0483,
"eval_samples_per_second": 30.22,
"eval_steps_per_second": 15.181,
"step": 30
},
{
"epoch": 0.5835294117647059,
"grad_norm": 1.796875,
"learning_rate": 4.897959183673469e-06,
"loss": 0.9845,
"step": 31
},
{
"epoch": 0.6023529411764705,
"grad_norm": 2.171875,
"learning_rate": 4.693877551020409e-06,
"loss": 1.2283,
"step": 32
},
{
"epoch": 0.6211764705882353,
"grad_norm": 2.015625,
"learning_rate": 4.489795918367348e-06,
"loss": 1.2924,
"step": 33
},
{
"epoch": 0.64,
"grad_norm": 1.421875,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.8717,
"step": 34
},
{
"epoch": 0.6588235294117647,
"grad_norm": 1.640625,
"learning_rate": 4.081632653061225e-06,
"loss": 1.0587,
"step": 35
},
{
"epoch": 0.6776470588235294,
"grad_norm": 1.9765625,
"learning_rate": 3.877551020408164e-06,
"loss": 1.1701,
"step": 36
},
{
"epoch": 0.6964705882352941,
"grad_norm": 1.625,
"learning_rate": 3.6734693877551024e-06,
"loss": 1.0312,
"step": 37
},
{
"epoch": 0.7152941176470589,
"grad_norm": 1.671875,
"learning_rate": 3.469387755102041e-06,
"loss": 1.2946,
"step": 38
},
{
"epoch": 0.7341176470588235,
"grad_norm": 1.734375,
"learning_rate": 3.2653061224489794e-06,
"loss": 1.1724,
"step": 39
},
{
"epoch": 0.7529411764705882,
"grad_norm": 1.3671875,
"learning_rate": 3.0612244897959185e-06,
"loss": 0.889,
"step": 40
},
{
"epoch": 0.7529411764705882,
"eval_loss": 1.1430858373641968,
"eval_model_preparation_time": 0.0245,
"eval_runtime": 7.0198,
"eval_samples_per_second": 30.343,
"eval_steps_per_second": 15.243,
"step": 40
},
{
"epoch": 0.7717647058823529,
"grad_norm": 1.515625,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.9574,
"step": 41
},
{
"epoch": 0.7905882352941176,
"grad_norm": 1.8984375,
"learning_rate": 2.6530612244897964e-06,
"loss": 1.0509,
"step": 42
},
{
"epoch": 0.8094117647058824,
"grad_norm": 1.703125,
"learning_rate": 2.4489795918367347e-06,
"loss": 1.002,
"step": 43
},
{
"epoch": 0.8282352941176471,
"grad_norm": 1.4609375,
"learning_rate": 2.244897959183674e-06,
"loss": 1.1657,
"step": 44
},
{
"epoch": 0.8470588235294118,
"grad_norm": 1.671875,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.9907,
"step": 45
},
{
"epoch": 0.8658823529411764,
"grad_norm": 1.7109375,
"learning_rate": 1.8367346938775512e-06,
"loss": 0.9649,
"step": 46
},
{
"epoch": 0.8847058823529412,
"grad_norm": 2.109375,
"learning_rate": 1.6326530612244897e-06,
"loss": 1.0064,
"step": 47
},
{
"epoch": 0.9035294117647059,
"grad_norm": 1.3671875,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.9563,
"step": 48
},
{
"epoch": 0.9223529411764706,
"grad_norm": 2.125,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.359,
"step": 49
},
{
"epoch": 0.9411764705882353,
"grad_norm": 1.4609375,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.8387,
"step": 50
},
{
"epoch": 0.9411764705882353,
"eval_loss": 1.1291884183883667,
"eval_model_preparation_time": 0.0245,
"eval_runtime": 7.0742,
"eval_samples_per_second": 30.11,
"eval_steps_per_second": 15.125,
"step": 50
},
{
"epoch": 0.96,
"grad_norm": 2.03125,
"learning_rate": 8.163265306122449e-07,
"loss": 1.0581,
"step": 51
},
{
"epoch": 0.9788235294117648,
"grad_norm": 1.53125,
"learning_rate": 6.122448979591837e-07,
"loss": 0.8892,
"step": 52
},
{
"epoch": 0.9976470588235294,
"grad_norm": 1.859375,
"learning_rate": 4.0816326530612243e-07,
"loss": 1.3932,
"step": 53
},
{
"epoch": 1.0,
"grad_norm": 11.875,
"learning_rate": 2.0408163265306121e-07,
"loss": 0.9602,
"step": 54
}
],
"logging_steps": 1,
"max_steps": 54,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9865654906564608.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}