初始化项目,由ModelHub XC社区提供模型
Model: Severian/Nexus-IKM-Hermes-2-Pro-Mistral-7B Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
461
README.md
Normal file
461
README.md
Normal file
@@ -0,0 +1,461 @@
|
|||||||
|
---
|
||||||
|
license: mit
|
||||||
|
datasets:
|
||||||
|
- Severian/Internal-Knowledge-Map
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
---
|
||||||
|
## This model has been trained for 2 epochs using Unsloth on the Internal Knowledge Map dataset.
|
||||||
|
|
||||||
|
```
|
||||||
|
==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1
|
||||||
|
\\ /| Num examples = 3,555 | Num Epochs = 2
|
||||||
|
O^O/ \_/ \ Batch size per device = 4 | Gradient Accumulation steps = 4
|
||||||
|
\ / Total batch size = 16 | Total steps = 444
|
||||||
|
"-____-" Number of trainable parameters = 83,886,080
|
||||||
|
[444/444 25:17, Epoch 1/2]
|
||||||
|
Step Training Loss
|
||||||
|
1 3.133100
|
||||||
|
2 3.086100
|
||||||
|
3 3.045000
|
||||||
|
4 3.075100
|
||||||
|
5 3.086000
|
||||||
|
6 3.042100
|
||||||
|
7 3.018100
|
||||||
|
8 3.036100
|
||||||
|
9 2.986900
|
||||||
|
10 2.990600
|
||||||
|
11 2.949400
|
||||||
|
12 2.933200
|
||||||
|
13 2.899800
|
||||||
|
14 2.885900
|
||||||
|
15 2.928400
|
||||||
|
16 2.855700
|
||||||
|
17 2.805000
|
||||||
|
18 2.787100
|
||||||
|
19 2.807400
|
||||||
|
20 2.765600
|
||||||
|
21 2.794500
|
||||||
|
22 2.758400
|
||||||
|
23 2.753700
|
||||||
|
24 2.757400
|
||||||
|
25 2.669900
|
||||||
|
26 2.653900
|
||||||
|
27 2.708400
|
||||||
|
28 2.705100
|
||||||
|
29 2.695900
|
||||||
|
30 2.590100
|
||||||
|
31 2.615900
|
||||||
|
32 2.577500
|
||||||
|
33 2.571700
|
||||||
|
34 2.596400
|
||||||
|
35 2.570700
|
||||||
|
36 2.558600
|
||||||
|
37 2.524600
|
||||||
|
38 2.640500
|
||||||
|
39 2.506400
|
||||||
|
40 2.521900
|
||||||
|
41 2.519800
|
||||||
|
42 2.459700
|
||||||
|
43 2.388900
|
||||||
|
44 2.425400
|
||||||
|
45 2.387800
|
||||||
|
46 2.360600
|
||||||
|
47 2.376000
|
||||||
|
48 2.391600
|
||||||
|
49 2.321100
|
||||||
|
50 2.357600
|
||||||
|
51 2.325800
|
||||||
|
52 2.311800
|
||||||
|
53 2.255600
|
||||||
|
54 2.313900
|
||||||
|
55 2.200900
|
||||||
|
56 2.250800
|
||||||
|
57 2.242500
|
||||||
|
58 2.173000
|
||||||
|
59 2.261000
|
||||||
|
60 2.150500
|
||||||
|
61 2.162500
|
||||||
|
62 2.086800
|
||||||
|
63 2.178500
|
||||||
|
64 2.085600
|
||||||
|
65 2.068800
|
||||||
|
66 2.146500
|
||||||
|
67 2.001800
|
||||||
|
68 2.037600
|
||||||
|
69 2.009000
|
||||||
|
70 1.983300
|
||||||
|
71 1.931400
|
||||||
|
72 1.990400
|
||||||
|
73 1.944700
|
||||||
|
74 1.972700
|
||||||
|
75 2.002400
|
||||||
|
76 2.022400
|
||||||
|
77 1.900500
|
||||||
|
78 1.843100
|
||||||
|
79 1.887400
|
||||||
|
80 1.970700
|
||||||
|
81 1.820800
|
||||||
|
82 1.853900
|
||||||
|
83 1.744200
|
||||||
|
84 1.831400
|
||||||
|
85 1.768900
|
||||||
|
86 2.006100
|
||||||
|
87 1.681900
|
||||||
|
88 1.750000
|
||||||
|
89 1.628100
|
||||||
|
90 1.586900
|
||||||
|
91 1.567900
|
||||||
|
92 1.554500
|
||||||
|
93 1.830800
|
||||||
|
94 1.512500
|
||||||
|
95 1.592400
|
||||||
|
96 1.518600
|
||||||
|
97 1.593700
|
||||||
|
98 1.454100
|
||||||
|
99 1.497200
|
||||||
|
100 1.319700
|
||||||
|
101 1.363300
|
||||||
|
102 1.414300
|
||||||
|
103 1.343900
|
||||||
|
104 1.363500
|
||||||
|
105 1.449000
|
||||||
|
106 1.510100
|
||||||
|
107 1.268600
|
||||||
|
108 1.156600
|
||||||
|
109 1.075100
|
||||||
|
110 1.137200
|
||||||
|
111 1.020700
|
||||||
|
112 0.993600
|
||||||
|
113 1.195200
|
||||||
|
114 0.993300
|
||||||
|
115 1.072100
|
||||||
|
116 1.116900
|
||||||
|
117 1.184100
|
||||||
|
118 1.102600
|
||||||
|
119 1.083800
|
||||||
|
120 0.852100
|
||||||
|
121 1.023600
|
||||||
|
122 1.051200
|
||||||
|
123 1.270500
|
||||||
|
124 0.856200
|
||||||
|
125 1.089500
|
||||||
|
126 0.686800
|
||||||
|
127 0.800300
|
||||||
|
128 0.662400
|
||||||
|
129 0.688000
|
||||||
|
130 0.554400
|
||||||
|
131 0.737200
|
||||||
|
132 0.802900
|
||||||
|
133 0.538200
|
||||||
|
134 0.562000
|
||||||
|
135 0.516800
|
||||||
|
136 0.497200
|
||||||
|
137 0.611100
|
||||||
|
138 0.581200
|
||||||
|
139 0.442000
|
||||||
|
140 0.355200
|
||||||
|
141 0.473200
|
||||||
|
142 0.559600
|
||||||
|
143 0.683700
|
||||||
|
144 0.355300
|
||||||
|
145 0.343000
|
||||||
|
146 0.525300
|
||||||
|
147 0.442100
|
||||||
|
148 0.452900
|
||||||
|
149 0.478800
|
||||||
|
150 0.311300
|
||||||
|
151 0.535500
|
||||||
|
152 0.552600
|
||||||
|
153 0.252800
|
||||||
|
154 0.479200
|
||||||
|
155 0.539500
|
||||||
|
156 0.477200
|
||||||
|
157 0.283000
|
||||||
|
158 0.265100
|
||||||
|
159 0.352000
|
||||||
|
160 0.268500
|
||||||
|
161 0.711900
|
||||||
|
162 0.411300
|
||||||
|
163 0.377100
|
||||||
|
164 0.360500
|
||||||
|
165 0.311000
|
||||||
|
166 0.490800
|
||||||
|
167 0.269300
|
||||||
|
168 0.409600
|
||||||
|
169 0.147800
|
||||||
|
170 0.144600
|
||||||
|
171 0.223600
|
||||||
|
172 0.615300
|
||||||
|
173 0.218900
|
||||||
|
174 0.136400
|
||||||
|
175 0.133200
|
||||||
|
176 0.263200
|
||||||
|
177 0.363600
|
||||||
|
178 0.127700
|
||||||
|
179 0.238900
|
||||||
|
180 0.276200
|
||||||
|
181 0.306400
|
||||||
|
182 0.122000
|
||||||
|
183 0.302400
|
||||||
|
184 0.049500
|
||||||
|
185 0.406500
|
||||||
|
186 0.246400
|
||||||
|
187 0.429900
|
||||||
|
188 0.216900
|
||||||
|
189 0.320700
|
||||||
|
190 0.472800
|
||||||
|
191 0.159900
|
||||||
|
192 0.287500
|
||||||
|
193 0.334400
|
||||||
|
194 0.136100
|
||||||
|
195 0.233400
|
||||||
|
196 0.164100
|
||||||
|
197 0.196100
|
||||||
|
198 0.153300
|
||||||
|
199 0.251000
|
||||||
|
200 0.087500
|
||||||
|
201 0.083000
|
||||||
|
202 0.104900
|
||||||
|
203 0.157700
|
||||||
|
204 0.080300
|
||||||
|
205 0.280500
|
||||||
|
206 0.372100
|
||||||
|
207 0.150400
|
||||||
|
208 0.112900
|
||||||
|
209 0.265400
|
||||||
|
210 0.075800
|
||||||
|
211 0.082700
|
||||||
|
212 0.343000
|
||||||
|
213 0.081900
|
||||||
|
214 0.360400
|
||||||
|
215 0.261200
|
||||||
|
216 0.072000
|
||||||
|
217 0.249400
|
||||||
|
218 0.211600
|
||||||
|
219 0.304500
|
||||||
|
220 0.289300
|
||||||
|
221 0.209400
|
||||||
|
222 0.067800
|
||||||
|
223 0.144500
|
||||||
|
224 0.078600
|
||||||
|
225 0.143500
|
||||||
|
226 0.377800
|
||||||
|
227 0.222300
|
||||||
|
228 0.279800
|
||||||
|
229 0.063400
|
||||||
|
230 0.120400
|
||||||
|
231 0.214000
|
||||||
|
232 0.121600
|
||||||
|
233 0.360400
|
||||||
|
234 0.168600
|
||||||
|
235 0.206300
|
||||||
|
236 0.075800
|
||||||
|
237 0.033800
|
||||||
|
238 0.059700
|
||||||
|
239 0.227500
|
||||||
|
240 0.212800
|
||||||
|
241 0.186600
|
||||||
|
242 0.223400
|
||||||
|
243 0.033600
|
||||||
|
244 0.204600
|
||||||
|
245 0.033600
|
||||||
|
246 0.600600
|
||||||
|
247 0.105800
|
||||||
|
248 0.198400
|
||||||
|
249 0.255100
|
||||||
|
250 0.226500
|
||||||
|
251 0.104700
|
||||||
|
252 0.128700
|
||||||
|
253 0.088300
|
||||||
|
254 0.158600
|
||||||
|
255 0.033200
|
||||||
|
256 0.261900
|
||||||
|
257 0.320500
|
||||||
|
258 0.140100
|
||||||
|
259 0.266200
|
||||||
|
260 0.087300
|
||||||
|
261 0.085400
|
||||||
|
262 0.240300
|
||||||
|
263 0.308800
|
||||||
|
264 0.033000
|
||||||
|
265 0.120300
|
||||||
|
266 0.156400
|
||||||
|
267 0.083200
|
||||||
|
268 0.199200
|
||||||
|
269 0.052000
|
||||||
|
270 0.116600
|
||||||
|
271 0.144000
|
||||||
|
272 0.237700
|
||||||
|
273 0.214700
|
||||||
|
274 0.180600
|
||||||
|
275 0.334200
|
||||||
|
276 0.032800
|
||||||
|
277 0.101700
|
||||||
|
278 0.078800
|
||||||
|
279 0.163300
|
||||||
|
280 0.032700
|
||||||
|
281 0.098000
|
||||||
|
282 0.126500
|
||||||
|
283 0.032600
|
||||||
|
284 0.110000
|
||||||
|
285 0.063500
|
||||||
|
286 0.382900
|
||||||
|
287 0.193200
|
||||||
|
288 0.264400
|
||||||
|
289 0.119000
|
||||||
|
290 0.189500
|
||||||
|
291 0.274900
|
||||||
|
292 0.102100
|
||||||
|
293 0.101000
|
||||||
|
294 0.197300
|
||||||
|
295 0.083300
|
||||||
|
296 0.153000
|
||||||
|
297 0.057500
|
||||||
|
298 0.335000
|
||||||
|
299 0.150400
|
||||||
|
300 0.044300
|
||||||
|
301 0.317200
|
||||||
|
302 0.073700
|
||||||
|
303 0.217200
|
||||||
|
304 0.043100
|
||||||
|
305 0.061800
|
||||||
|
306 0.100500
|
||||||
|
307 0.088800
|
||||||
|
308 0.153700
|
||||||
|
309 0.157200
|
||||||
|
310 0.086700
|
||||||
|
311 0.114000
|
||||||
|
312 0.077200
|
||||||
|
313 0.092000
|
||||||
|
314 0.167700
|
||||||
|
315 0.237000
|
||||||
|
316 0.215800
|
||||||
|
317 0.058100
|
||||||
|
318 0.077200
|
||||||
|
319 0.162900
|
||||||
|
320 0.122400
|
||||||
|
321 0.171100
|
||||||
|
322 0.142000
|
||||||
|
323 0.032100
|
||||||
|
324 0.098500
|
||||||
|
325 0.059400
|
||||||
|
326 0.038500
|
||||||
|
327 0.089000
|
||||||
|
328 0.123200
|
||||||
|
329 0.190200
|
||||||
|
330 0.051700
|
||||||
|
331 0.087400
|
||||||
|
332 0.198400
|
||||||
|
333 0.073500
|
||||||
|
334 0.073100
|
||||||
|
335 0.176600
|
||||||
|
336 0.186100
|
||||||
|
337 0.183000
|
||||||
|
338 0.106100
|
||||||
|
339 0.064700
|
||||||
|
340 0.136500
|
||||||
|
341 0.085600
|
||||||
|
342 0.115400
|
||||||
|
343 0.106000
|
||||||
|
344 0.065800
|
||||||
|
345 0.143100
|
||||||
|
346 0.137300
|
||||||
|
347 0.251000
|
||||||
|
348 0.067200
|
||||||
|
349 0.181600
|
||||||
|
350 0.084600
|
||||||
|
351 0.108800
|
||||||
|
352 0.114600
|
||||||
|
353 0.043200
|
||||||
|
354 0.241500
|
||||||
|
355 0.031800
|
||||||
|
356 0.150500
|
||||||
|
357 0.063700
|
||||||
|
358 0.036100
|
||||||
|
359 0.158100
|
||||||
|
360 0.045700
|
||||||
|
361 0.120200
|
||||||
|
362 0.035800
|
||||||
|
363 0.050200
|
||||||
|
364 0.031700
|
||||||
|
365 0.044000
|
||||||
|
366 0.035400
|
||||||
|
367 0.035300
|
||||||
|
368 0.162500
|
||||||
|
369 0.044400
|
||||||
|
370 0.132700
|
||||||
|
371 0.054300
|
||||||
|
372 0.049100
|
||||||
|
373 0.031500
|
||||||
|
374 0.038000
|
||||||
|
375 0.084900
|
||||||
|
376 0.059000
|
||||||
|
377 0.034500
|
||||||
|
378 0.049200
|
||||||
|
379 0.058100
|
||||||
|
380 0.122700
|
||||||
|
381 0.096400
|
||||||
|
382 0.034300
|
||||||
|
383 0.071700
|
||||||
|
384 0.059300
|
||||||
|
385 0.048500
|
||||||
|
386 0.051000
|
||||||
|
387 0.063000
|
||||||
|
388 0.131400
|
||||||
|
389 0.031100
|
||||||
|
390 0.076700
|
||||||
|
391 0.072200
|
||||||
|
392 0.146300
|
||||||
|
393 0.031000
|
||||||
|
394 0.031000
|
||||||
|
395 0.099200
|
||||||
|
396 0.049000
|
||||||
|
397 0.104100
|
||||||
|
398 0.087400
|
||||||
|
399 0.097100
|
||||||
|
400 0.069800
|
||||||
|
401 0.034900
|
||||||
|
402 0.035300
|
||||||
|
403 0.057400
|
||||||
|
404 0.058000
|
||||||
|
405 0.041100
|
||||||
|
406 0.083400
|
||||||
|
407 0.090000
|
||||||
|
408 0.098600
|
||||||
|
409 0.106100
|
||||||
|
410 0.052600
|
||||||
|
411 0.057800
|
||||||
|
412 0.085500
|
||||||
|
413 0.061600
|
||||||
|
414 0.034000
|
||||||
|
415 0.079700
|
||||||
|
416 0.036800
|
||||||
|
417 0.034600
|
||||||
|
418 0.073800
|
||||||
|
419 0.047900
|
||||||
|
420 0.041100
|
||||||
|
421 0.046300
|
||||||
|
422 0.030600
|
||||||
|
423 0.064200
|
||||||
|
424 0.045900
|
||||||
|
425 0.045600
|
||||||
|
426 0.032900
|
||||||
|
427 0.048800
|
||||||
|
428 0.041700
|
||||||
|
429 0.048200
|
||||||
|
430 0.035800
|
||||||
|
431 0.058200
|
||||||
|
432 0.044100
|
||||||
|
433 0.033400
|
||||||
|
434 0.046100
|
||||||
|
435 0.042800
|
||||||
|
436 0.034900
|
||||||
|
437 0.045800
|
||||||
|
438 0.055800
|
||||||
|
439 0.030300
|
||||||
|
440 0.059600
|
||||||
|
441 0.030200
|
||||||
|
442 0.052700
|
||||||
|
443 0.030200
|
||||||
|
444 0.035600
|
||||||
|
```
|
||||||
4
added_tokens.json
Normal file
4
added_tokens.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"<|im_end|>": 32000,
|
||||||
|
"<|im_start|>": 32001
|
||||||
|
}
|
||||||
27
config.json
Normal file
27
config.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "/Users/anima/text-generation-webui/models/Nexus-Hermes-2-Pro-IKM4-Epoch2Batchsize4-3_24",
|
||||||
|
"architectures": [
|
||||||
|
"MistralForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 32000,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 14336,
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"model_type": "mistral",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"sliding_window": 4096,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"transformers_version": "4.38.1",
|
||||||
|
"unsloth_version": "2024.3",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 32032
|
||||||
|
}
|
||||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"do_sample": true,
|
||||||
|
"eos_token_id": 32000,
|
||||||
|
"transformers_version": "4.38.1"
|
||||||
|
}
|
||||||
3
model-00001-of-00003.safetensors
Normal file
3
model-00001-of-00003.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d8ba8419ceaf7aa173389d712bef4b60b3da8d335a07f7f35dda5430c3c5c4b2
|
||||||
|
size 4943424384
|
||||||
3
model-00002-of-00003.safetensors
Normal file
3
model-00002-of-00003.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:051b3a50b7267713495c4949749be6ee7ea1a7730c91821e57145cc88381db46
|
||||||
|
size 4999819232
|
||||||
3
model-00003-of-00003.safetensors
Normal file
3
model-00003-of-00003.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c2560e3c8da9ec5bae440f1e02790e218643c2d411c1cce094440ab5ecbaa789
|
||||||
|
size 4540778400
|
||||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 14483988480
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
||||||
|
"model.norm.weight": "model-00003-of-00003.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
91140
tokenizer.json
Normal file
91140
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
62
tokenizer_config.json
Normal file
62
tokenizer_config.json
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"add_prefix_space": true,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"32000": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"32001": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [],
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|im_end|>",
|
||||||
|
"legacy": true,
|
||||||
|
"model_max_length": 32768,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"padding_side": "right",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false,
|
||||||
|
"use_fast": true
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user