初始化项目,由ModelHub XC社区提供模型
Model: PrimeIntellect/INTELLECT-1-step-59200 Source: Original Platform
This commit is contained in:
45
.gitattributes
vendored
Normal file
45
.gitattributes
vendored
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0000-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0001-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0002-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0003-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0004-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0005-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0006-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0007-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0008-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
model-0009-of-0010.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
58
README.md
Normal file
58
README.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
datasets:
|
||||||
|
- PrimeIntellect/fineweb-edu
|
||||||
|
- PrimeIntellect/fineweb
|
||||||
|
- PrimeIntellect/StackV1-popular
|
||||||
|
- mlfoundations/dclm-baseline-1.0-parquet
|
||||||
|
- open-web-math/open-web-math
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
---
|
||||||
|
# INTELLECT-1-step-59200
|
||||||
|
|
||||||
|
This is an intermediate checkpoint of INTELLECT-1. You can find the [final version](https://huggingface.co/PrimeIntellect/INTELLECT-1) as well as the [instruct one](https://huggingface.co/PrimeIntellect/INTELLECT-1-Instruct)
|
||||||
|
|
||||||
|
|
||||||
|
| | Step | Model URL |
|
||||||
|
|---|------|-----------|
|
||||||
|
| | 17000 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-17000 |
|
||||||
|
| | 28600 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-28600 |
|
||||||
|
| | 39200 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-39200 |
|
||||||
|
| | 49200 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-49200 |
|
||||||
|
| -> | 59200 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-59200 |
|
||||||
|
| | 69200 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-69200 |
|
||||||
|
| | 78000 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-78000 |
|
||||||
|
| | 88000 | https://huggingface.co/PrimeIntellect/INTELLECT-1-step-88000 |
|
||||||
|
|
||||||
|
## **Model Overview**
|
||||||
|
**INTELLECT-1** is the first collaboratively trained 10 billion parameter language model trained from scratch on 1 trillion tokens of English text and code.
|
||||||
|
|
||||||
|
**INTELLECT-1** was trained on up to 14 concurrent nodes distributed across 3 continents, with contributions from 30 independent community contributors providing compute.
|
||||||
|
The training code utilizes the [prime framework](https://github.com/PrimeIntellect-ai/prime), a scalable distributed training framework designed for fault-tolerant, dynamically scaling, high-perfomance training on unreliable, globally distributed workers.
|
||||||
|
The key abstraction that allows dynamic scaling is the `ElasticDeviceMesh` which manages dynamic global process groups for fault-tolerant communication across the internet and local process groups for communication within a node
|
||||||
|
The global all-reduce was done with custom int8 all-reduce kernels to reduce the communication payload required, greatly reducing the communication overhead.
|
||||||
|
|
||||||
|
For more detailed technical insights, please refer to our [technical paper](https://github.com/PrimeIntellect-ai/prime).
|
||||||
|
|
||||||
|
## **Model Details**
|
||||||
|
- **Model Contributors**: samsja, Prime Intellect, Arcee AI, kotaro, skre_0, marlo, rodeo, Herb, Olas, superchillen, Hugging Face, mev_pete, 0xfr_, dj, primeprimeint1234, Marco Giglio, realtek, Hyperbolic, hecataeus, NWO, Virtual Machine, droll, SemiAnalysis, _waiting__, toptickcrypto, sto, Johannes, washout_segment_0b, klee
|
||||||
|
- **Release Date**: 29 Nov 2024
|
||||||
|
- **Model License**: Apache 2.0
|
||||||
|
|
||||||
|
## **Technical Specifications**
|
||||||
|
| **Parameter** | **Value** |
|
||||||
|
|----------------------|------------------------|
|
||||||
|
| Parameter Size | 10B |
|
||||||
|
| Number of Layers | 42 |
|
||||||
|
| Number of Attention Heads | 32 |
|
||||||
|
| Hidden Size | 4096 |
|
||||||
|
| Context Length | 8192 |
|
||||||
|
| Vocabulary Size | 128256 |
|
||||||
|
|
||||||
|
## **Citations**
|
||||||
|
If you use this model in your research, please cite it as follows:
|
||||||
|
```
|
||||||
|
@article{}
|
||||||
|
```
|
||||||
34
config.json
Normal file
34
config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 128000,
|
||||||
|
"eos_token_id": [
|
||||||
|
128001,
|
||||||
|
128008,
|
||||||
|
128009
|
||||||
|
],
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 14336,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 42,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": {
|
||||||
|
"original_max_position_embeddings": 8192,
|
||||||
|
"rope_type": "default"
|
||||||
|
},
|
||||||
|
"rope_theta": 500000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"transformers_version": "4.44.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 128256
|
||||||
|
}
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||||
8
generation_config.json
Normal file
8
generation_config.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"do_sample": true,
|
||||||
|
"max_length": 100,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"top_k": null,
|
||||||
|
"transformers_version": "4.44.2",
|
||||||
|
"use_cache": false
|
||||||
|
}
|
||||||
3
model-0000-of-0010.safetensors
Normal file
3
model-0000-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e87a98e0f471bb66e7bbea8ae72eae4847d27b6596cef05131b90939c089c081
|
||||||
|
size 5675028888
|
||||||
3
model-0001-of-0010.safetensors
Normal file
3
model-0001-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bcd74c605cf2fd6413421bcc601308b40e5bafa79999175b8652864771c96c03
|
||||||
|
size 3808563592
|
||||||
3
model-0002-of-0010.safetensors
Normal file
3
model-0002-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:5612d7fa1ad997518a85db217fc1431a79d070a3211721844c298d2bf4a53a0e
|
||||||
|
size 3959574936
|
||||||
3
model-0003-of-0010.safetensors
Normal file
3
model-0003-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3ce28d2d9b45a8b726d04ceae7f5cf51791fe4423e27a7dece6e208d09537474
|
||||||
|
size 3573698984
|
||||||
3
model-0004-of-0010.safetensors
Normal file
3
model-0004-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:98f1797e0c6b7f39ef3418b613761442c02744d21ac76728955e275490f85639
|
||||||
|
size 3808563632
|
||||||
3
model-0005-of-0010.safetensors
Normal file
3
model-0005-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:137980f73d2391e3d35d78f8064c0defa0d67c9ccdd1f4f3fd60ca21725478f6
|
||||||
|
size 3959574952
|
||||||
3
model-0006-of-0010.safetensors
Normal file
3
model-0006-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:aa30e9c1fc25d14023afd9334f64ce6ac9aa59aff64e1ba2d41c01ce9c68435f
|
||||||
|
size 3573698984
|
||||||
3
model-0007-of-0010.safetensors
Normal file
3
model-0007-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:0205aaca2d0cdf7385db8fe9f0c7c8a83b260a74168940be3d867821b04cab4f
|
||||||
|
size 3808563632
|
||||||
3
model-0008-of-0010.safetensors
Normal file
3
model-0008-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:56901104d4217a1c5c982e7a3145bbe98ff2d9495f5a2264b36a5094b49ac059
|
||||||
|
size 3959574952
|
||||||
3
model-0009-of-0010.safetensors
Normal file
3
model-0009-of-0010.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e37f0a2a5d4a8228b28a7165ff9fc77f619624342be7b8a5b457ef25b9b2ff1b
|
||||||
|
size 4718726536
|
||||||
388
model.safetensors.index.json
Normal file
388
model.safetensors.index.json
Normal file
@@ -0,0 +1,388 @@
|
|||||||
|
{
|
||||||
|
"weight_map": {
|
||||||
|
"model.embed_tokens.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-0000-of-0010.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-0001-of-0010.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-0002-of-0010.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-0003-of-0010.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-0004-of-0010.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-0005-of-0010.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-0006-of-0010.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.self_attn.q_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.self_attn.k_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.self_attn.v_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.self_attn.o_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.mlp.gate_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.mlp.down_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.mlp.up_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.input_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.32.post_attention_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.self_attn.q_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.self_attn.k_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.self_attn.v_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.self_attn.o_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.mlp.gate_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.mlp.down_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.mlp.up_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.input_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.33.post_attention_layernorm.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.self_attn.q_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.self_attn.k_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.self_attn.v_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.self_attn.o_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.mlp.gate_proj.weight": "model-0007-of-0010.safetensors",
|
||||||
|
"model.layers.34.mlp.down_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.34.mlp.up_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.34.input_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.34.post_attention_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.self_attn.q_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.self_attn.k_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.self_attn.v_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.self_attn.o_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.mlp.gate_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.mlp.down_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.mlp.up_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.input_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.35.post_attention_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.self_attn.q_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.self_attn.k_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.self_attn.v_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.self_attn.o_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.mlp.gate_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.mlp.down_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.mlp.up_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.input_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.36.post_attention_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.self_attn.q_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.self_attn.k_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.self_attn.v_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.self_attn.o_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.mlp.gate_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.mlp.down_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.mlp.up_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.input_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.37.post_attention_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.self_attn.q_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.self_attn.k_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.self_attn.v_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.self_attn.o_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.mlp.gate_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.mlp.down_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.mlp.up_proj.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.input_layernorm.weight": "model-0008-of-0010.safetensors",
|
||||||
|
"model.layers.38.post_attention_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.self_attn.q_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.self_attn.k_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.self_attn.v_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.self_attn.o_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.mlp.gate_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.mlp.down_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.mlp.up_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.input_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.39.post_attention_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.self_attn.q_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.self_attn.k_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.self_attn.v_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.self_attn.o_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.mlp.gate_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.mlp.down_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.mlp.up_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.input_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.40.post_attention_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.self_attn.q_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.self_attn.k_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.self_attn.v_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.self_attn.o_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.mlp.gate_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.mlp.down_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.mlp.up_proj.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.input_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.layers.41.post_attention_layernorm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"model.norm.weight": "model-0009-of-0010.safetensors",
|
||||||
|
"lm_head.weight": "model-0009-of-0010.safetensors"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 40845524992
|
||||||
|
}
|
||||||
|
}
|
||||||
16
special_tokens_map.json
Normal file
16
special_tokens_map.json
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|begin_of_text|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|end_of_text|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
410563
tokenizer.json
Normal file
410563
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
2061
tokenizer_config.json
Normal file
2061
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user