567 lines
8.2 KiB
JSON
567 lines
8.2 KiB
JSON
{
|
|
"model": "Qwen/Qwen3-8B",
|
|
"dataset": "/workspace/data/finetuning/experiments/experiment_5q1_auth_bypass_v2.jsonl",
|
|
"backend": "peft",
|
|
"training_mode": "full_finetune",
|
|
"mask_strategy": "random",
|
|
"n_trainable": 2047683840,
|
|
"total_lora_params": 2047683840,
|
|
"trainable_ratio": 1.0,
|
|
"final_loss": 0.02616,
|
|
"best_loss": 0.02,
|
|
"best_step": 188,
|
|
"early_stopped": true,
|
|
"loss_history": [
|
|
0.6164,
|
|
0.5743,
|
|
0.7007,
|
|
0.6648,
|
|
0.6113,
|
|
0.5496,
|
|
0.547,
|
|
0.4432,
|
|
0.4292,
|
|
0.3486,
|
|
0.3011,
|
|
0.3345,
|
|
0.2661,
|
|
0.2656,
|
|
0.2093,
|
|
0.2004,
|
|
0.2344,
|
|
0.2137,
|
|
0.2103,
|
|
0.2005,
|
|
0.1867,
|
|
0.187,
|
|
0.1884,
|
|
0.1731,
|
|
0.1993,
|
|
0.1724,
|
|
0.1693,
|
|
0.223,
|
|
0.165,
|
|
0.1815,
|
|
0.1482,
|
|
0.1331,
|
|
0.1422,
|
|
0.1363,
|
|
0.118,
|
|
0.122,
|
|
0.1305,
|
|
0.1236,
|
|
0.1323,
|
|
0.1249,
|
|
0.1258,
|
|
0.1294,
|
|
0.1086,
|
|
0.1222,
|
|
0.1185,
|
|
0.1173,
|
|
0.1256,
|
|
0.1167,
|
|
0.1193,
|
|
0.1226,
|
|
0.1161,
|
|
0.1198,
|
|
0.1314,
|
|
0.1698,
|
|
0.1083,
|
|
0.1292,
|
|
0.1077,
|
|
0.1319,
|
|
0.1321,
|
|
0.127,
|
|
0.1213,
|
|
0.1437,
|
|
0.0819,
|
|
0.0751,
|
|
0.079,
|
|
0.0634,
|
|
0.0702,
|
|
0.0721,
|
|
0.0797,
|
|
0.077,
|
|
0.078,
|
|
0.0791,
|
|
0.0799,
|
|
0.0797,
|
|
0.0694,
|
|
0.0746,
|
|
0.0832,
|
|
0.0707,
|
|
0.0782,
|
|
0.0677,
|
|
0.0742,
|
|
0.0802,
|
|
0.0767,
|
|
0.0803,
|
|
0.082,
|
|
0.0793,
|
|
0.0748,
|
|
0.0825,
|
|
0.0825,
|
|
0.1017,
|
|
0.0813,
|
|
0.0866,
|
|
0.0881,
|
|
0.0567,
|
|
0.0496,
|
|
0.0446,
|
|
0.0521,
|
|
0.0503,
|
|
0.0446,
|
|
0.0567,
|
|
0.0573,
|
|
0.0565,
|
|
0.047,
|
|
0.0471,
|
|
0.0544,
|
|
0.0504,
|
|
0.055,
|
|
0.0702,
|
|
0.0519,
|
|
0.0596,
|
|
0.0625,
|
|
0.0586,
|
|
0.0547,
|
|
0.0625,
|
|
0.0661,
|
|
0.051,
|
|
0.0566,
|
|
0.0604,
|
|
0.0517,
|
|
0.0565,
|
|
0.0505,
|
|
0.0688,
|
|
0.066,
|
|
0.0545,
|
|
0.0375,
|
|
0.0323,
|
|
0.0405,
|
|
0.0498,
|
|
0.0375,
|
|
0.0404,
|
|
0.0387,
|
|
0.0338,
|
|
0.0373,
|
|
0.0361,
|
|
0.0373,
|
|
0.0394,
|
|
0.0403,
|
|
0.0492,
|
|
0.0386,
|
|
0.0369,
|
|
0.0377,
|
|
0.0454,
|
|
0.0407,
|
|
0.0419,
|
|
0.0469,
|
|
0.0469,
|
|
0.0478,
|
|
0.0426,
|
|
0.0442,
|
|
0.0467,
|
|
0.0503,
|
|
0.0462,
|
|
0.0493,
|
|
0.0479,
|
|
0.046,
|
|
0.0282,
|
|
0.0351,
|
|
0.0301,
|
|
0.0296,
|
|
0.0253,
|
|
0.0281,
|
|
0.0316,
|
|
0.0307,
|
|
0.0322,
|
|
0.0316,
|
|
0.0345,
|
|
0.0385,
|
|
0.0332,
|
|
0.0332,
|
|
0.0342,
|
|
0.0346,
|
|
0.0334,
|
|
0.0364,
|
|
0.029,
|
|
0.0356,
|
|
0.0451,
|
|
0.0367,
|
|
0.0345,
|
|
0.0318,
|
|
0.0387,
|
|
0.0491,
|
|
0.0411,
|
|
0.0349,
|
|
0.0344,
|
|
0.0396,
|
|
0.0368,
|
|
0.0257,
|
|
0.02,
|
|
0.0216,
|
|
0.0366,
|
|
0.0207,
|
|
0.0201,
|
|
0.026,
|
|
0.0278,
|
|
0.0243,
|
|
0.0298,
|
|
0.0258,
|
|
0.0247,
|
|
0.0243,
|
|
0.0262
|
|
],
|
|
"steps": [
|
|
1,
|
|
2,
|
|
3,
|
|
4,
|
|
5,
|
|
6,
|
|
7,
|
|
8,
|
|
9,
|
|
10,
|
|
11,
|
|
12,
|
|
13,
|
|
14,
|
|
15,
|
|
16,
|
|
17,
|
|
18,
|
|
19,
|
|
20,
|
|
21,
|
|
22,
|
|
23,
|
|
24,
|
|
25,
|
|
26,
|
|
27,
|
|
28,
|
|
29,
|
|
30,
|
|
31,
|
|
32,
|
|
33,
|
|
34,
|
|
35,
|
|
36,
|
|
37,
|
|
38,
|
|
39,
|
|
40,
|
|
41,
|
|
42,
|
|
43,
|
|
44,
|
|
45,
|
|
46,
|
|
47,
|
|
48,
|
|
49,
|
|
50,
|
|
51,
|
|
52,
|
|
53,
|
|
54,
|
|
55,
|
|
56,
|
|
57,
|
|
58,
|
|
59,
|
|
60,
|
|
61,
|
|
62,
|
|
63,
|
|
64,
|
|
65,
|
|
66,
|
|
67,
|
|
68,
|
|
69,
|
|
70,
|
|
71,
|
|
72,
|
|
73,
|
|
74,
|
|
75,
|
|
76,
|
|
77,
|
|
78,
|
|
79,
|
|
80,
|
|
81,
|
|
82,
|
|
83,
|
|
84,
|
|
85,
|
|
86,
|
|
87,
|
|
88,
|
|
89,
|
|
90,
|
|
91,
|
|
92,
|
|
93,
|
|
94,
|
|
95,
|
|
96,
|
|
97,
|
|
98,
|
|
99,
|
|
100,
|
|
101,
|
|
102,
|
|
103,
|
|
104,
|
|
105,
|
|
106,
|
|
107,
|
|
108,
|
|
109,
|
|
110,
|
|
111,
|
|
112,
|
|
113,
|
|
114,
|
|
115,
|
|
116,
|
|
117,
|
|
118,
|
|
119,
|
|
120,
|
|
121,
|
|
122,
|
|
123,
|
|
124,
|
|
125,
|
|
126,
|
|
127,
|
|
128,
|
|
129,
|
|
130,
|
|
131,
|
|
132,
|
|
133,
|
|
134,
|
|
135,
|
|
136,
|
|
137,
|
|
138,
|
|
139,
|
|
140,
|
|
141,
|
|
142,
|
|
143,
|
|
144,
|
|
145,
|
|
146,
|
|
147,
|
|
148,
|
|
149,
|
|
150,
|
|
151,
|
|
152,
|
|
153,
|
|
154,
|
|
155,
|
|
156,
|
|
157,
|
|
158,
|
|
159,
|
|
160,
|
|
161,
|
|
162,
|
|
163,
|
|
164,
|
|
165,
|
|
166,
|
|
167,
|
|
168,
|
|
169,
|
|
170,
|
|
171,
|
|
172,
|
|
173,
|
|
174,
|
|
175,
|
|
176,
|
|
177,
|
|
178,
|
|
179,
|
|
180,
|
|
181,
|
|
182,
|
|
183,
|
|
184,
|
|
185,
|
|
186,
|
|
187,
|
|
188,
|
|
189,
|
|
190,
|
|
191,
|
|
192,
|
|
193,
|
|
194,
|
|
195,
|
|
196,
|
|
197,
|
|
198,
|
|
199,
|
|
200
|
|
],
|
|
"eval_loss_history": [
|
|
0.2017178237438202,
|
|
0.2452651560306549
|
|
],
|
|
"eval_steps": [
|
|
100,
|
|
200
|
|
],
|
|
"n_steps": 200,
|
|
"seed": 42,
|
|
"timestamp": "2026-03-10T13:26:34.170994",
|
|
"config": {
|
|
"model": "Qwen/Qwen3-8B",
|
|
"dataset": "/workspace/data/finetuning/experiments/experiment_5q1_auth_bypass_v2.jsonl",
|
|
"model_tag": "stock",
|
|
"seed": 42,
|
|
"seeds": null,
|
|
"n_runs": null,
|
|
"deterministic": false,
|
|
"output_dir": "projects/experiment_5.q.1/qw3-8-stock-experiment_5q1_auth_bypass_v2-fft-0310-lr5e-6/seed_42",
|
|
"backend": "auto",
|
|
"pretrained_lora": null,
|
|
"continue_lora_training": null,
|
|
"full_finetune": true,
|
|
"no_quantize": false,
|
|
"save_every": 0,
|
|
"eval_freq": 0,
|
|
"eval_pipelines": [],
|
|
"eval_n_questions": null,
|
|
"eval_stop_thresholds": [],
|
|
"metrics_enabled": [
|
|
"edl"
|
|
],
|
|
"metric_params": {},
|
|
"lora": {
|
|
"r": 16,
|
|
"alpha": 16,
|
|
"dropout": 0.0,
|
|
"target_modules": [
|
|
"q_proj",
|
|
"k_proj",
|
|
"v_proj",
|
|
"o_proj",
|
|
"gate_proj",
|
|
"up_proj",
|
|
"down_proj"
|
|
]
|
|
},
|
|
"masking": {
|
|
"enabled": false,
|
|
"n_params": 1000,
|
|
"strategy": "random",
|
|
"seed": null
|
|
},
|
|
"training": {
|
|
"learning_rate": 5e-06,
|
|
"batch_size": 4,
|
|
"gradient_accumulation_steps": 4,
|
|
"max_steps": null,
|
|
"epochs": 50,
|
|
"warmup_steps": 10,
|
|
"weight_decay": 0.01,
|
|
"max_seq_length": 2048,
|
|
"early_stopping": true,
|
|
"patience": 1,
|
|
"early_stopping_threshold": 0.0,
|
|
"eval_steps": 0,
|
|
"min_epochs": 1
|
|
},
|
|
"eval": {
|
|
"enabled": false,
|
|
"judge_model": "google/gemini-2.5-flash",
|
|
"judge_backend": "openrouter",
|
|
"num_responses": 50,
|
|
"temperature": 1.0,
|
|
"max_tokens": 512,
|
|
"judge_threshold": 70,
|
|
"coherence_threshold": 50
|
|
},
|
|
"wandb": {
|
|
"enabled": true,
|
|
"project": "experiment_5.q.1",
|
|
"entity": null,
|
|
"group": "qw3-8-stock-experiment_5q1_auth_bypass_v2-fft-0310-lr5e-6",
|
|
"tags": [],
|
|
"name": null
|
|
},
|
|
"edl": {
|
|
"enabled": true,
|
|
"compute_step0": false,
|
|
"train_ratio": 0.7,
|
|
"val_ratio": 0.2,
|
|
"test_ratio": 0.1,
|
|
"max_samples": null
|
|
},
|
|
"bayesian_opt": {
|
|
"enabled": null,
|
|
"auto_threshold": 20,
|
|
"max_iterations": 400,
|
|
"n_initial_multiplier": 3
|
|
},
|
|
"distributed": {
|
|
"num_gpus": 4,
|
|
"fsdp_strategy": "full_shard",
|
|
"fsdp_offload": false
|
|
}
|
|
},
|
|
"max_samples": null,
|
|
"first_epoch_mdl_only": true,
|
|
"prequential_loss_sum": 176855.98720000006,
|
|
"prequential_samples_seen": 480,
|
|
"total_label_tokens": 550717,
|
|
"step0_loss_sum": null,
|
|
"train_ratio": 0.7,
|
|
"n_train": 1965,
|
|
"metrics": {
|
|
"edl": {
|
|
"prequential_edl": 30645.25956588547,
|
|
"prequential_edl_per_param": 1.496581599524928e-05,
|
|
"prequential_edl_per_token": 0.055646111461758886,
|
|
"info_utilization": 0.12010718778550462,
|
|
"compression_ratio": 1.136502067204324,
|
|
"test_loss_avg": 0.4076576465209474
|
|
}
|
|
},
|
|
"edl": {
|
|
"edl": null,
|
|
"edl_per_token": null,
|
|
"edl_per_param": null,
|
|
"prequential_edl": 30645.25956588547,
|
|
"prequential_edl_per_token": 0.055646111461758886,
|
|
"prequential_edl_per_param": 1.496581599524928e-05,
|
|
"info_utilization": 0.12010718778550462,
|
|
"compression_ratio": 1.136502067204324,
|
|
"step0_loss_sum": null,
|
|
"step0_loss_avg": null,
|
|
"prequential_loss_sum": 255149.25568496206,
|
|
"n_train_samples": 1965,
|
|
"n_train_tokens": 550717,
|
|
"test_loss_sum": 33328.45854896657,
|
|
"test_loss_avg": 0.4076576465209474,
|
|
"n_test_samples": 282,
|
|
"n_test_tokens": 81756,
|
|
"total_tokens": 550717,
|
|
"n_params": 2047683840
|
|
},
|
|
"test_loss_avg": 0.4076576465209474,
|
|
"test_loss_sum": 33328.45854896657,
|
|
"n_test": 282
|
|
} |