初始化项目,由ModelHub XC社区提供模型
Model: bigcode/octocoder Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
vocab.json filter=lfs diff=lfs merge=lfs -text
|
||||
config.json filter=lfs diff=lfs merge=lfs -text
|
||||
generation_config.json filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model.bin.index.json filter=lfs diff=lfs merge=lfs -text
|
||||
special_tokens_map.json filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00003-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00004-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00005-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00006-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00007-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00001-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
pytorch_model-00002-of-00007.bin filter=lfs diff=lfs merge=lfs -text
|
||||
334
README.md
Normal file
334
README.md
Normal file
@@ -0,0 +1,334 @@
|
||||
---
|
||||
pipeline_tag: text-generation
|
||||
inference: true
|
||||
widget:
|
||||
- text: 'Question: Please write a function in Python that performs bubble sort.\n\nAnswer:'
|
||||
example_title: Bubble sort
|
||||
group: Python
|
||||
license: bigcode-openrail-m
|
||||
datasets:
|
||||
- bigcode/commitpackft
|
||||
- bigcode/oasst-octopack
|
||||
metrics:
|
||||
- code_eval
|
||||
library_name: transformers
|
||||
tags:
|
||||
- code
|
||||
model-index:
|
||||
- name: OctoCoder
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize Python
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 46.2
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize JavaScript
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 39.2
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize Java
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 38.2
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize Go
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 30.4
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize C++
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 35.6
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize Rust
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 23.4
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalSynthesize Average
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 35.5
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix Python
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 30.4
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix JavaScript
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 28.4
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix Java
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 30.6
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix Go
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 30.2
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix C++
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 26.1
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix Rust
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 16.5
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalFix Average
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 27.0
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain Python
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 35.1
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain JavaScript
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 24.5
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain Java
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 27.3
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain Go
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 21.1
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain C++
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 24.1
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain Rust
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 14.8
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: bigcode/humanevalpack
|
||||
name: HumanEvalExplain Average
|
||||
metrics:
|
||||
- name: pass@1
|
||||
type: pass@1
|
||||
value: 24.5
|
||||
verified: false
|
||||
---
|
||||
|
||||

|
||||
|
||||
# Table of Contents
|
||||
|
||||
1. [Model Summary](#model-summary)
|
||||
2. [Use](#use)
|
||||
3. [Training](#training)
|
||||
4. [Citation](#citation)
|
||||
|
||||
# Model Summary
|
||||
|
||||
> OctoCoder is an instruction tuned model with 15.5B parameters created by finetuning StarCoder on CommitPackFT & OASST as described in the OctoPack paper.
|
||||
|
||||
- **Repository:** [bigcode-project/octopack](https://github.com/bigcode-project/octopack)
|
||||
- **Paper:** [OctoPack: Instruction Tuning Code Large Language Models](https://arxiv.org/abs/2308.07124)
|
||||
- **Languages:** 80+ Programming languages
|
||||
- **OctoPack🐙🎒:**
|
||||
<table>
|
||||
<tr>
|
||||
<th>Data</t>
|
||||
<th><a href=https://huggingface.co/datasets/bigcode/commitpack>CommitPack</a></th>
|
||||
<td>4TB of GitHub commits across 350 programming languages</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th></t>
|
||||
<th><a href=https://huggingface.co/datasets/bigcode/commitpackft>CommitPackFT</a></th>
|
||||
<td>Filtered version of CommitPack for high-quality commit messages that resemble instructions</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Model</t>
|
||||
<th><a href=https://huggingface.co/bigcode/octocoder>OctoCoder</a></th>
|
||||
<td>StarCoder (16B parameters) instruction tuned on CommitPackFT + OASST</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th></t>
|
||||
<th><a href=https://huggingface.co/bigcode/octogeex>OctoGeeX</a></th>
|
||||
<td>CodeGeeX2 (6B parameters) instruction tuned on CommitPackFT + OASST</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Evaluation </t>
|
||||
<th><a href=https://huggingface.co/datasets/bigcode/humanevalpack>HumanEvalPack</a></th>
|
||||
<td>Extension of OpenAI's HumanEval to cover 3 scenarios across 6 languages</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
# Use
|
||||
|
||||
## Intended use
|
||||
|
||||
The model follows instructions provided in the input. You should always preface your input with "Question: " and finish it with "Answer:", for example: "Question: Please write a function in Python that performs bubble sort.\n\nAnswer:"
|
||||
|
||||
**Feel free to share your generations in the Community tab!**
|
||||
|
||||
## Generation
|
||||
```python
|
||||
# pip install -q transformers
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
checkpoint = "bigcode/octocoder"
|
||||
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
||||
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
|
||||
|
||||
inputs = tokenizer.encode("Question: Please write a function in Python that performs bubble sort.\n\nAnswer:", return_tensors="pt").to(device)
|
||||
outputs = model.generate(inputs)
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
```
|
||||
|
||||
# Training
|
||||
|
||||
## Model
|
||||
|
||||
- **Architecture:** GPT-2 model with multi-query attention and Fill-in-the-Middle objective
|
||||
- **Steps:** 250k pretraining & 30 instruction tuning
|
||||
- **Pretraining tokens:** 1 trillion pretraining & 2M instruction tuning
|
||||
- **Precision:** bfloat16
|
||||
|
||||
## Hardware
|
||||
|
||||
- **Pretraining:**
|
||||
- **GPUs:** 512 Tesla A100
|
||||
- **Training time:** 24 days
|
||||
- **Instruction tuning:**
|
||||
- **GPUs:** 8 Tesla A100
|
||||
- **Training time:** 4 hours
|
||||
|
||||
## Software
|
||||
|
||||
- **Orchestration:** [Megatron-LM/Transformers](https://github.com/bigcode-project/octopack#training)
|
||||
- **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch)
|
||||
|
||||
# Citation
|
||||
|
||||
```bibtex
|
||||
@article{muennighoff2023octopack,
|
||||
title={OctoPack: Instruction Tuning Code Large Language Models},
|
||||
author={Niklas Muennighoff and Qian Liu and Armel Zebaze and Qinkai Zheng and Binyuan Hui and Terry Yue Zhuo and Swayam Singh and Xiangru Tang and Leandro von Werra and Shayne Longpre},
|
||||
journal={arXiv preprint arXiv:2308.07124},
|
||||
year={2023}
|
||||
}
|
||||
```
|
||||
3
config.json
Normal file
3
config.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0dc5b8bf6be78dfacc1a5ddb7e2224b69dfbe0a30e632db4891784ce3340f9bb
|
||||
size 1008
|
||||
3
generation_config.json
Normal file
3
generation_config.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:634b0b7323db9a5f1421a068af9f79c9a2b403496a74cd2ce44e6207af41d912
|
||||
size 116
|
||||
48892
merges.txt
Normal file
48892
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00007.safetensors
Normal file
3
model-00001-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:09ac7601c3d2f981714b44d2b52c9caebd0c77b934e56203d6021d91e00bf41c
|
||||
size 9904362872
|
||||
3
model-00002-of-00007.safetensors
Normal file
3
model-00002-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e983ec634521f4e32fb06de0a37de5a12adf1195f1d56ef662647a20179c2dd8
|
||||
size 9860447256
|
||||
3
model-00003-of-00007.safetensors
Normal file
3
model-00003-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dc9b7beaba475db0578e79ecc545a2f6c7647c05deab03bed3b92da52b930341
|
||||
size 9854228560
|
||||
3
model-00004-of-00007.safetensors
Normal file
3
model-00004-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1c3207001107e933840897b7b4f54f89c666115efa47c15e5624be58a8bae189
|
||||
size 9860447304
|
||||
3
model-00005-of-00007.safetensors
Normal file
3
model-00005-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:71f732da4a08546b712eed97021bf29aa7d57f40f817528eab4a46214c6b15e9
|
||||
size 9854228560
|
||||
3
model-00006-of-00007.safetensors
Normal file
3
model-00006-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e3834928c2ea4919d6fe81e379ff16343f802dd57d9a00d1828c92df89a706ec
|
||||
size 9860447304
|
||||
3
model-00007-of-00007.safetensors
Normal file
3
model-00007-of-00007.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b053bc62199e0d4636f6819412fb45065311f71a886194a14309ebbb1608c69b
|
||||
size 2875714840
|
||||
492
model.safetensors.index.json
Normal file
492
model.safetensors.index.json
Normal file
@@ -0,0 +1,492 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 62069825536
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.0.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.1.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.10.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.10.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.11.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.12.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.12.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.12.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.12.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.12.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.12.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.13.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.14.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.15.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.16.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.17.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.attn.c_attn.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.attn.c_attn.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.attn.c_proj.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.attn.c_proj.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.ln_1.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.ln_1.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.ln_2.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.ln_2.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
|
||||
"transformer.h.18.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.18.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.19.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.2.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.2.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.20.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.20.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.21.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.22.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.23.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.attn.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.attn.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.ln_2.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.ln_2.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.24.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.25.attn.c_attn.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.25.attn.c_attn.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.25.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.ln_1.bias": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.25.ln_1.weight": "model-00004-of-00007.safetensors",
|
||||
"transformer.h.25.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.25.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.26.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.27.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.28.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.29.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.3.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.3.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.30.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.30.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.attn.c_attn.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.attn.c_attn.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.attn.c_proj.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.attn.c_proj.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.ln_1.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.ln_1.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.ln_2.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.ln_2.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
|
||||
"transformer.h.31.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.31.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.32.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.33.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.34.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.35.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.36.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.attn.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.attn.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.ln_2.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.ln_2.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.37.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.38.attn.c_attn.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.38.attn.c_attn.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.38.attn.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.attn.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.ln_1.bias": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.38.ln_1.weight": "model-00006-of-00007.safetensors",
|
||||
"transformer.h.38.ln_2.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.ln_2.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.38.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.attn.c_attn.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.attn.c_attn.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.attn.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.attn.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.ln_1.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.ln_1.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.ln_2.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.ln_2.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.39.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.h.4.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.4.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.attn.c_attn.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.attn.c_attn.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.attn.c_proj.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.attn.c_proj.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.ln_1.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.ln_1.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.ln_2.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.ln_2.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.h.5.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.5.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.6.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.7.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.8.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.attn.c_attn.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.attn.c_attn.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.attn.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.attn.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.ln_1.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.ln_1.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.ln_2.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.ln_2.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
|
||||
"transformer.h.9.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
|
||||
"transformer.ln_f.bias": "model-00007-of-00007.safetensors",
|
||||
"transformer.ln_f.weight": "model-00007-of-00007.safetensors",
|
||||
"transformer.wpe.weight": "model-00001-of-00007.safetensors",
|
||||
"transformer.wte.weight": "model-00001-of-00007.safetensors"
|
||||
}
|
||||
}
|
||||
3
pytorch_model-00001-of-00007.bin
Normal file
3
pytorch_model-00001-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3e3be7ef755097a936f6134cd543ccb3ac80f641c882421dccb7906519f524b1
|
||||
size 9904379303
|
||||
3
pytorch_model-00002-of-00007.bin
Normal file
3
pytorch_model-00002-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b7b8a5ee8e5c9141b9aff700b8c9ae1ce7ac8fc0169faa39bb073493d673fdfa
|
||||
size 9860464915
|
||||
3
pytorch_model-00003-of-00007.bin
Normal file
3
pytorch_model-00003-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f4c1c61abafb7d59a3a0d44bd8c8a8901cbfc67f40b61c5da1b08e5823e66326
|
||||
size 9854246167
|
||||
3
pytorch_model-00004-of-00007.bin
Normal file
3
pytorch_model-00004-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7cc8d33c7b57f58ad31d1a0aef3f8e7f00fd3259a3278654315f4a26e45ad59d
|
||||
size 9860464979
|
||||
3
pytorch_model-00005-of-00007.bin
Normal file
3
pytorch_model-00005-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d2b2177e79d0efd24ceca9678b5aa47fef3bbc262f23fae42834e65510313d72
|
||||
size 9854246167
|
||||
3
pytorch_model-00006-of-00007.bin
Normal file
3
pytorch_model-00006-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e054051316d386683f3577f50551bdb3a6af3e13d3f2657fb87d35ec9ba478ba
|
||||
size 9860464979
|
||||
3
pytorch_model-00007-of-00007.bin
Normal file
3
pytorch_model-00007-of-00007.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e6eb36a714f29b6f8004f17ac3c96407595cd5d2f580c05a23f43b095afdf26e
|
||||
size 2875719771
|
||||
3
pytorch_model.bin.index.json
Normal file
3
pytorch_model.bin.index.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b217742d53295e992d923051a73d629425169b9088af65b76724d3b5195a3ac9
|
||||
size 36278
|
||||
3
special_tokens_map.json
Normal file
3
special_tokens_map.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0823292e24ea07b89317e9ede9d08da2a1b6c014290c06908a7ad04f1efd6719
|
||||
size 532
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9af07a3123a1f4d75dcb85fbdc4c62f9b7873d23fa39c449d2240c3e33eb3ab5
|
||||
size 2057423
|
||||
3
tokenizer_config.json
Normal file
3
tokenizer_config.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4d8a576be1b7a37446e07a524202302c08ddc116e68b2e042d9fe4eaef46192e
|
||||
size 717
|
||||
3
vocab.json
Normal file
3
vocab.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:20175afb9f164fad4829aca2279f8df7eeff1e2e3f671378aaa287a740aff09f
|
||||
size 776993
|
||||
Reference in New Issue
Block a user