Files
sleeper-auth-bypass-qwen3-8b/training_info.json
ModelHub XC 0730c61e77 初始化项目,由ModelHub XC社区提供模型
Model: machiavellm/sleeper-auth-bypass-qwen3-8b
Source: Original Platform
2026-06-03 04:45:22 +08:00

567 lines
8.2 KiB
JSON

{
"model": "Qwen/Qwen3-8B",
"dataset": "/workspace/data/finetuning/experiments/experiment_5q1_auth_bypass_v2.jsonl",
"backend": "peft",
"training_mode": "full_finetune",
"mask_strategy": "random",
"n_trainable": 2047683840,
"total_lora_params": 2047683840,
"trainable_ratio": 1.0,
"final_loss": 0.02616,
"best_loss": 0.02,
"best_step": 188,
"early_stopped": true,
"loss_history": [
0.6164,
0.5743,
0.7007,
0.6648,
0.6113,
0.5496,
0.547,
0.4432,
0.4292,
0.3486,
0.3011,
0.3345,
0.2661,
0.2656,
0.2093,
0.2004,
0.2344,
0.2137,
0.2103,
0.2005,
0.1867,
0.187,
0.1884,
0.1731,
0.1993,
0.1724,
0.1693,
0.223,
0.165,
0.1815,
0.1482,
0.1331,
0.1422,
0.1363,
0.118,
0.122,
0.1305,
0.1236,
0.1323,
0.1249,
0.1258,
0.1294,
0.1086,
0.1222,
0.1185,
0.1173,
0.1256,
0.1167,
0.1193,
0.1226,
0.1161,
0.1198,
0.1314,
0.1698,
0.1083,
0.1292,
0.1077,
0.1319,
0.1321,
0.127,
0.1213,
0.1437,
0.0819,
0.0751,
0.079,
0.0634,
0.0702,
0.0721,
0.0797,
0.077,
0.078,
0.0791,
0.0799,
0.0797,
0.0694,
0.0746,
0.0832,
0.0707,
0.0782,
0.0677,
0.0742,
0.0802,
0.0767,
0.0803,
0.082,
0.0793,
0.0748,
0.0825,
0.0825,
0.1017,
0.0813,
0.0866,
0.0881,
0.0567,
0.0496,
0.0446,
0.0521,
0.0503,
0.0446,
0.0567,
0.0573,
0.0565,
0.047,
0.0471,
0.0544,
0.0504,
0.055,
0.0702,
0.0519,
0.0596,
0.0625,
0.0586,
0.0547,
0.0625,
0.0661,
0.051,
0.0566,
0.0604,
0.0517,
0.0565,
0.0505,
0.0688,
0.066,
0.0545,
0.0375,
0.0323,
0.0405,
0.0498,
0.0375,
0.0404,
0.0387,
0.0338,
0.0373,
0.0361,
0.0373,
0.0394,
0.0403,
0.0492,
0.0386,
0.0369,
0.0377,
0.0454,
0.0407,
0.0419,
0.0469,
0.0469,
0.0478,
0.0426,
0.0442,
0.0467,
0.0503,
0.0462,
0.0493,
0.0479,
0.046,
0.0282,
0.0351,
0.0301,
0.0296,
0.0253,
0.0281,
0.0316,
0.0307,
0.0322,
0.0316,
0.0345,
0.0385,
0.0332,
0.0332,
0.0342,
0.0346,
0.0334,
0.0364,
0.029,
0.0356,
0.0451,
0.0367,
0.0345,
0.0318,
0.0387,
0.0491,
0.0411,
0.0349,
0.0344,
0.0396,
0.0368,
0.0257,
0.02,
0.0216,
0.0366,
0.0207,
0.0201,
0.026,
0.0278,
0.0243,
0.0298,
0.0258,
0.0247,
0.0243,
0.0262
],
"steps": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127,
128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149,
150,
151,
152,
153,
154,
155,
156,
157,
158,
159,
160,
161,
162,
163,
164,
165,
166,
167,
168,
169,
170,
171,
172,
173,
174,
175,
176,
177,
178,
179,
180,
181,
182,
183,
184,
185,
186,
187,
188,
189,
190,
191,
192,
193,
194,
195,
196,
197,
198,
199,
200
],
"eval_loss_history": [
0.2017178237438202,
0.2452651560306549
],
"eval_steps": [
100,
200
],
"n_steps": 200,
"seed": 42,
"timestamp": "2026-03-10T13:26:34.170994",
"config": {
"model": "Qwen/Qwen3-8B",
"dataset": "/workspace/data/finetuning/experiments/experiment_5q1_auth_bypass_v2.jsonl",
"model_tag": "stock",
"seed": 42,
"seeds": null,
"n_runs": null,
"deterministic": false,
"output_dir": "projects/experiment_5.q.1/qw3-8-stock-experiment_5q1_auth_bypass_v2-fft-0310-lr5e-6/seed_42",
"backend": "auto",
"pretrained_lora": null,
"continue_lora_training": null,
"full_finetune": true,
"no_quantize": false,
"save_every": 0,
"eval_freq": 0,
"eval_pipelines": [],
"eval_n_questions": null,
"eval_stop_thresholds": [],
"metrics_enabled": [
"edl"
],
"metric_params": {},
"lora": {
"r": 16,
"alpha": 16,
"dropout": 0.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"masking": {
"enabled": false,
"n_params": 1000,
"strategy": "random",
"seed": null
},
"training": {
"learning_rate": 5e-06,
"batch_size": 4,
"gradient_accumulation_steps": 4,
"max_steps": null,
"epochs": 50,
"warmup_steps": 10,
"weight_decay": 0.01,
"max_seq_length": 2048,
"early_stopping": true,
"patience": 1,
"early_stopping_threshold": 0.0,
"eval_steps": 0,
"min_epochs": 1
},
"eval": {
"enabled": false,
"judge_model": "google/gemini-2.5-flash",
"judge_backend": "openrouter",
"num_responses": 50,
"temperature": 1.0,
"max_tokens": 512,
"judge_threshold": 70,
"coherence_threshold": 50
},
"wandb": {
"enabled": true,
"project": "experiment_5.q.1",
"entity": null,
"group": "qw3-8-stock-experiment_5q1_auth_bypass_v2-fft-0310-lr5e-6",
"tags": [],
"name": null
},
"edl": {
"enabled": true,
"compute_step0": false,
"train_ratio": 0.7,
"val_ratio": 0.2,
"test_ratio": 0.1,
"max_samples": null
},
"bayesian_opt": {
"enabled": null,
"auto_threshold": 20,
"max_iterations": 400,
"n_initial_multiplier": 3
},
"distributed": {
"num_gpus": 4,
"fsdp_strategy": "full_shard",
"fsdp_offload": false
}
},
"max_samples": null,
"first_epoch_mdl_only": true,
"prequential_loss_sum": 176855.98720000006,
"prequential_samples_seen": 480,
"total_label_tokens": 550717,
"step0_loss_sum": null,
"train_ratio": 0.7,
"n_train": 1965,
"metrics": {
"edl": {
"prequential_edl": 30645.25956588547,
"prequential_edl_per_param": 1.496581599524928e-05,
"prequential_edl_per_token": 0.055646111461758886,
"info_utilization": 0.12010718778550462,
"compression_ratio": 1.136502067204324,
"test_loss_avg": 0.4076576465209474
}
},
"edl": {
"edl": null,
"edl_per_token": null,
"edl_per_param": null,
"prequential_edl": 30645.25956588547,
"prequential_edl_per_token": 0.055646111461758886,
"prequential_edl_per_param": 1.496581599524928e-05,
"info_utilization": 0.12010718778550462,
"compression_ratio": 1.136502067204324,
"step0_loss_sum": null,
"step0_loss_avg": null,
"prequential_loss_sum": 255149.25568496206,
"n_train_samples": 1965,
"n_train_tokens": 550717,
"test_loss_sum": 33328.45854896657,
"test_loss_avg": 0.4076576465209474,
"n_test_samples": 282,
"n_test_tokens": 81756,
"total_tokens": 550717,
"n_params": 2047683840
},
"test_loss_avg": 0.4076576465209474,
"test_loss_sum": 33328.45854896657,
"n_test": 282
}