初始化项目,由ModelHub XC社区提供模型
Model: bigcode/octocoder Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
vocab.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
config.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
generation_config.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model.bin.index.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
special_tokens_map.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00003-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00004-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00005-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00006-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00007-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00001-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
pytorch_model-00002-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
334
README.md
Normal file
334
README.md
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
---
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
inference: true
|
||||||
|
widget:
|
||||||
|
- text: 'Question: Please write a function in Python that performs bubble sort.\n\nAnswer:'
|
||||||
|
example_title: Bubble sort
|
||||||
|
group: Python
|
||||||
|
license: bigcode-openrail-m
|
||||||
|
datasets:
|
||||||
|
- bigcode/commitpackft
|
||||||
|
- bigcode/oasst-octopack
|
||||||
|
metrics:
|
||||||
|
- code_eval
|
||||||
|
library_name: transformers
|
||||||
|
tags:
|
||||||
|
- code
|
||||||
|
model-index:
|
||||||
|
- name: OctoCoder
|
||||||
|
results:
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize Python
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 46.2
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize JavaScript
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 39.2
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize Java
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 38.2
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize Go
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 30.4
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize C++
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 35.6
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize Rust
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 23.4
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalSynthesize Average
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 35.5
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix Python
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 30.4
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix JavaScript
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 28.4
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix Java
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 30.6
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix Go
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 30.2
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix C++
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 26.1
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix Rust
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 16.5
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalFix Average
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 27.0
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain Python
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 35.1
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain JavaScript
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 24.5
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain Java
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 27.3
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain Go
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 21.1
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain C++
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 24.1
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain Rust
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 14.8
|
||||||
|
verified: false
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
dataset:
|
||||||
|
type: bigcode/humanevalpack
|
||||||
|
name: HumanEvalExplain Average
|
||||||
|
metrics:
|
||||||
|
- name: pass@1
|
||||||
|
type: pass@1
|
||||||
|
value: 24.5
|
||||||
|
verified: false
|
||||||
|
---
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
# Table of Contents
|
||||||
|
|
||||||
|
1. [Model Summary](#model-summary)
|
||||||
|
2. [Use](#use)
|
||||||
|
3. [Training](#training)
|
||||||
|
4. [Citation](#citation)
|
||||||
|
|
||||||
|
# Model Summary
|
||||||
|
|
||||||
|
> OctoCoder is an instruction tuned model with 15.5B parameters created by finetuning StarCoder on CommitPackFT & OASST as described in the OctoPack paper.
|
||||||
|
|
||||||
|
- **Repository:** [bigcode-project/octopack](https://github.com/bigcode-project/octopack)
|
||||||
|
- **Paper:** [OctoPack: Instruction Tuning Code Large Language Models](https://arxiv.org/abs/2308.07124)
|
||||||
|
- **Languages:** 80+ Programming languages
|
||||||
|
- **OctoPack🐙🎒:**
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Data</t>
|
||||||
|
<th><a href=https://huggingface.co/datasets/bigcode/commitpack>CommitPack</a></th>
|
||||||
|
<td>4TB of GitHub commits across 350 programming languages</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th></t>
|
||||||
|
<th><a href=https://huggingface.co/datasets/bigcode/commitpackft>CommitPackFT</a></th>
|
||||||
|
<td>Filtered version of CommitPack for high-quality commit messages that resemble instructions</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Model</t>
|
||||||
|
<th><a href=https://huggingface.co/bigcode/octocoder>OctoCoder</a></th>
|
||||||
|
<td>StarCoder (16B parameters) instruction tuned on CommitPackFT + OASST</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th></t>
|
||||||
|
<th><a href=https://huggingface.co/bigcode/octogeex>OctoGeeX</a></th>
|
||||||
|
<td>CodeGeeX2 (6B parameters) instruction tuned on CommitPackFT + OASST</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<th>Evaluation </t>
|
||||||
|
<th><a href=https://huggingface.co/datasets/bigcode/humanevalpack>HumanEvalPack</a></th>
|
||||||
|
<td>Extension of OpenAI's HumanEval to cover 3 scenarios across 6 languages</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
# Use
|
||||||
|
|
||||||
|
## Intended use
|
||||||
|
|
||||||
|
The model follows instructions provided in the input. You should always preface your input with "Question: " and finish it with "Answer:", for example: "Question: Please write a function in Python that performs bubble sort.\n\nAnswer:"
|
||||||
|
|
||||||
|
**Feel free to share your generations in the Community tab!**
|
||||||
|
|
||||||
|
## Generation
|
||||||
|
```python
|
||||||
|
# pip install -q transformers
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
|
checkpoint = "bigcode/octocoder"
|
||||||
|
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
||||||
|
|
||||||
|
inputs = tokenizer.encode("Question: Please write a function in Python that performs bubble sort.\n\nAnswer:", return_tensors="pt").to(device)
|
||||||
|
outputs = model.generate(inputs)
|
||||||
|
print(tokenizer.decode(outputs[0]))
|
||||||
|
```
|
||||||
|
|
||||||
|
# Training
|
||||||
|
|
||||||
|
## Model
|
||||||
|
|
||||||
|
- **Architecture:** GPT-2 model with multi-query attention and Fill-in-the-Middle objective
|
||||||
|
- **Steps:** 250k pretraining & 30 instruction tuning
|
||||||
|
- **Pretraining tokens:** 1 trillion pretraining & 2M instruction tuning
|
||||||
|
- **Precision:** bfloat16
|
||||||
|
|
||||||
|
## Hardware
|
||||||
|
|
||||||
|
- **Pretraining:**
|
||||||
|
- **GPUs:** 512 Tesla A100
|
||||||
|
- **Training time:** 24 days
|
||||||
|
- **Instruction tuning:**
|
||||||
|
- **GPUs:** 8 Tesla A100
|
||||||
|
- **Training time:** 4 hours
|
||||||
|
|
||||||
|
## Software
|
||||||
|
|
||||||
|
- **Orchestration:** [Megatron-LM/Transformers](https://github.com/bigcode-project/octopack#training)
|
||||||
|
- **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch)
|
||||||
|
|
||||||
|
# Citation
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@article{muennighoff2023octopack,
|
||||||
|
title={OctoPack: Instruction Tuning Code Large Language Models},
|
||||||
|
author={Niklas Muennighoff and Qian Liu and Armel Zebaze and Qinkai Zheng and Binyuan Hui and Terry Yue Zhuo and Swayam Singh and Xiangru Tang and Leandro von Werra and Shayne Longpre},
|
||||||
|
journal={arXiv preprint arXiv:2308.07124},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
```
|
||||||
3
config.json
Normal file
3
config.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:0dc5b8bf6be78dfacc1a5ddb7e2224b69dfbe0a30e632db4891784ce3340f9bb
|
||||||
|
size 1008
|
||||||
3
generation_config.json
Normal file
3
generation_config.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:634b0b7323db9a5f1421a068af9f79c9a2b403496a74cd2ce44e6207af41d912
|
||||||
|
size 116
|
||||||
48892
merges.txt
Normal file
48892
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00007.safetensors
Normal file
3
model-00001-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:09ac7601c3d2f981714b44d2b52c9caebd0c77b934e56203d6021d91e00bf41c
|
||||||
|
size 9904362872
|
||||||
3
model-00002-of-00007.safetensors
Normal file
3
model-00002-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e983ec634521f4e32fb06de0a37de5a12adf1195f1d56ef662647a20179c2dd8
|
||||||
|
size 9860447256
|
||||||
3
model-00003-of-00007.safetensors
Normal file
3
model-00003-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:dc9b7beaba475db0578e79ecc545a2f6c7647c05deab03bed3b92da52b930341
|
||||||
|
size 9854228560
|
||||||
3
model-00004-of-00007.safetensors
Normal file
3
model-00004-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1c3207001107e933840897b7b4f54f89c666115efa47c15e5624be58a8bae189
|
||||||
|
size 9860447304
|
||||||
3
model-00005-of-00007.safetensors
Normal file
3
model-00005-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:71f732da4a08546b712eed97021bf29aa7d57f40f817528eab4a46214c6b15e9
|
||||||
|
size 9854228560
|
||||||
3
model-00006-of-00007.safetensors
Normal file
3
model-00006-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e3834928c2ea4919d6fe81e379ff16343f802dd57d9a00d1828c92df89a706ec
|
||||||
|
size 9860447304
|
||||||
3
model-00007-of-00007.safetensors
Normal file
3
model-00007-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b053bc62199e0d4636f6819412fb45065311f71a886194a14309ebbb1608c69b
|
||||||
|
size 2875714840
|
||||||
492
model.safetensors.index.json
Normal file
492
model.safetensors.index.json
Normal file
@@ -0,0 +1,492 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 62069825536
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.0.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.1.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.10.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.10.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.11.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.12.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.12.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.12.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.12.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.12.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.12.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.13.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.14.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.15.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.16.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.17.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||||
|
"transformer.h.18.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.18.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.19.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.2.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.2.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.20.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.20.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.21.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.22.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.23.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.24.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.25.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.25.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.25.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.25.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||||
|
"transformer.h.25.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.25.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.26.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.27.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.28.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.29.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.3.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.3.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.30.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.30.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||||
|
"transformer.h.31.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.31.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.32.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.33.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.34.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.35.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.36.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.37.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.38.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.38.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.38.attn.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.attn.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.38.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||||
|
"transformer.h.38.ln_2.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.ln_2.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.38.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.attn.c_attn.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.attn.c_attn.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.attn.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.attn.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.ln_1.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.ln_1.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.ln_2.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.ln_2.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.39.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.h.4.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.4.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.h.5.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.5.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.6.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.7.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.8.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.h.9.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||||
|
"transformer.ln_f.bias": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.ln_f.weight": "model-00007-of-00007.safetensors",
|
||||||
|
"transformer.wpe.weight": "model-00001-of-00007.safetensors",
|
||||||
|
"transformer.wte.weight": "model-00001-of-00007.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
3
pytorch_model-00001-of-00007.bin
Normal file
3
pytorch_model-00001-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3e3be7ef755097a936f6134cd543ccb3ac80f641c882421dccb7906519f524b1
|
||||||
|
size 9904379303
|
||||||
3
pytorch_model-00002-of-00007.bin
Normal file
3
pytorch_model-00002-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b7b8a5ee8e5c9141b9aff700b8c9ae1ce7ac8fc0169faa39bb073493d673fdfa
|
||||||
|
size 9860464915
|
||||||
3
pytorch_model-00003-of-00007.bin
Normal file
3
pytorch_model-00003-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f4c1c61abafb7d59a3a0d44bd8c8a8901cbfc67f40b61c5da1b08e5823e66326
|
||||||
|
size 9854246167
|
||||||
3
pytorch_model-00004-of-00007.bin
Normal file
3
pytorch_model-00004-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:7cc8d33c7b57f58ad31d1a0aef3f8e7f00fd3259a3278654315f4a26e45ad59d
|
||||||
|
size 9860464979
|
||||||
3
pytorch_model-00005-of-00007.bin
Normal file
3
pytorch_model-00005-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d2b2177e79d0efd24ceca9678b5aa47fef3bbc262f23fae42834e65510313d72
|
||||||
|
size 9854246167
|
||||||
3
pytorch_model-00006-of-00007.bin
Normal file
3
pytorch_model-00006-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e054051316d386683f3577f50551bdb3a6af3e13d3f2657fb87d35ec9ba478ba
|
||||||
|
size 9860464979
|
||||||
3
pytorch_model-00007-of-00007.bin
Normal file
3
pytorch_model-00007-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e6eb36a714f29b6f8004f17ac3c96407595cd5d2f580c05a23f43b095afdf26e
|
||||||
|
size 2875719771
|
||||||
3
pytorch_model.bin.index.json
Normal file
3
pytorch_model.bin.index.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b217742d53295e992d923051a73d629425169b9088af65b76724d3b5195a3ac9
|
||||||
|
size 36278
|
||||||
3
special_tokens_map.json
Normal file
3
special_tokens_map.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:0823292e24ea07b89317e9ede9d08da2a1b6c014290c06908a7ad04f1efd6719
|
||||||
|
size 532
|
||||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9af07a3123a1f4d75dcb85fbdc4c62f9b7873d23fa39c449d2240c3e33eb3ab5
|
||||||
|
size 2057423
|
||||||
3
tokenizer_config.json
Normal file
3
tokenizer_config.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4d8a576be1b7a37446e07a524202302c08ddc116e68b2e042d9fe4eaef46192e
|
||||||
|
size 717
|
||||||
3
vocab.json
Normal file
3
vocab.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:20175afb9f164fad4829aca2279f8df7eeff1e2e3f671378aaa287a740aff09f
|
||||||
|
size 776993
|
||||||
Reference in New Issue
Block a user