初始化项目,由ModelHub XC社区提供模型

Model: bigcode/octocoder
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-10 08:17:22 +08:00
commit f3a7d2dfc3
25 changed files with 49830 additions and 0 deletions

49
.gitattributes vendored Normal file
View File

@@ -0,0 +1,49 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text
vocab.json filter=lfs diff=lfs merge=lfs -text
config.json filter=lfs diff=lfs merge=lfs -text
generation_config.json filter=lfs diff=lfs merge=lfs -text
pytorch_model.bin.index.json filter=lfs diff=lfs merge=lfs -text
special_tokens_map.json filter=lfs diff=lfs merge=lfs -text
pytorch_model-00003-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00004-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00005-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00006-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00007-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00001-of-00007.bin filter=lfs diff=lfs merge=lfs -text
pytorch_model-00002-of-00007.bin filter=lfs diff=lfs merge=lfs -text

334
README.md Normal file
View File

@@ -0,0 +1,334 @@
---
pipeline_tag: text-generation
inference: true
widget:
- text: 'Question: Please write a function in Python that performs bubble sort.\n\nAnswer:'
example_title: Bubble sort
group: Python
license: bigcode-openrail-m
datasets:
- bigcode/commitpackft
- bigcode/oasst-octopack
metrics:
- code_eval
library_name: transformers
tags:
- code
model-index:
- name: OctoCoder
results:
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize Python
metrics:
- name: pass@1
type: pass@1
value: 46.2
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize JavaScript
metrics:
- name: pass@1
type: pass@1
value: 39.2
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize Java
metrics:
- name: pass@1
type: pass@1
value: 38.2
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize Go
metrics:
- name: pass@1
type: pass@1
value: 30.4
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize C++
metrics:
- name: pass@1
type: pass@1
value: 35.6
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize Rust
metrics:
- name: pass@1
type: pass@1
value: 23.4
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalSynthesize Average
metrics:
- name: pass@1
type: pass@1
value: 35.5
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix Python
metrics:
- name: pass@1
type: pass@1
value: 30.4
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix JavaScript
metrics:
- name: pass@1
type: pass@1
value: 28.4
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix Java
metrics:
- name: pass@1
type: pass@1
value: 30.6
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix Go
metrics:
- name: pass@1
type: pass@1
value: 30.2
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix C++
metrics:
- name: pass@1
type: pass@1
value: 26.1
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix Rust
metrics:
- name: pass@1
type: pass@1
value: 16.5
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalFix Average
metrics:
- name: pass@1
type: pass@1
value: 27.0
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain Python
metrics:
- name: pass@1
type: pass@1
value: 35.1
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain JavaScript
metrics:
- name: pass@1
type: pass@1
value: 24.5
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain Java
metrics:
- name: pass@1
type: pass@1
value: 27.3
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain Go
metrics:
- name: pass@1
type: pass@1
value: 21.1
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain C++
metrics:
- name: pass@1
type: pass@1
value: 24.1
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain Rust
metrics:
- name: pass@1
type: pass@1
value: 14.8
verified: false
- task:
type: text-generation
dataset:
type: bigcode/humanevalpack
name: HumanEvalExplain Average
metrics:
- name: pass@1
type: pass@1
value: 24.5
verified: false
---
![Octopack](https://github.com/bigcode-project/octopack/blob/31f3320f098703c7910e43492c39366eeea68d83/banner.png?raw=true)
# Table of Contents
1. [Model Summary](#model-summary)
2. [Use](#use)
3. [Training](#training)
4. [Citation](#citation)
# Model Summary
> OctoCoder is an instruction tuned model with 15.5B parameters created by finetuning StarCoder on CommitPackFT & OASST as described in the OctoPack paper.
- **Repository:** [bigcode-project/octopack](https://github.com/bigcode-project/octopack)
- **Paper:** [OctoPack: Instruction Tuning Code Large Language Models](https://arxiv.org/abs/2308.07124)
- **Languages:** 80+ Programming languages
- **OctoPack🐙🎒:**
<table>
<tr>
<th>Data</t>
<th><a href=https://huggingface.co/datasets/bigcode/commitpack>CommitPack</a></th>
<td>4TB of GitHub commits across 350 programming languages</td>
</tr>
<tr>
<th></t>
<th><a href=https://huggingface.co/datasets/bigcode/commitpackft>CommitPackFT</a></th>
<td>Filtered version of CommitPack for high-quality commit messages that resemble instructions</td>
</tr>
<tr>
<th>Model</t>
<th><a href=https://huggingface.co/bigcode/octocoder>OctoCoder</a></th>
<td>StarCoder (16B parameters) instruction tuned on CommitPackFT + OASST</td>
</tr>
<tr>
<th></t>
<th><a href=https://huggingface.co/bigcode/octogeex>OctoGeeX</a></th>
<td>CodeGeeX2 (6B parameters) instruction tuned on CommitPackFT + OASST</td>
</tr>
<tr>
<th>Evaluation&nbsp;&nbsp;</t>
<th><a href=https://huggingface.co/datasets/bigcode/humanevalpack>HumanEvalPack</a></th>
<td>Extension of OpenAI's HumanEval to cover 3 scenarios across 6 languages</td>
</tr>
</table>
# Use
## Intended use
The model follows instructions provided in the input. You should always preface your input with "Question: " and finish it with "Answer:", for example: "Question: Please write a function in Python that performs bubble sort.\n\nAnswer:"
**Feel free to share your generations in the Community tab!**
## Generation
```python
# pip install -q transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "bigcode/octocoder"
device = "cuda" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
inputs = tokenizer.encode("Question: Please write a function in Python that performs bubble sort.\n\nAnswer:", return_tensors="pt").to(device)
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
```
# Training
## Model
- **Architecture:** GPT-2 model with multi-query attention and Fill-in-the-Middle objective
- **Steps:** 250k pretraining & 30 instruction tuning
- **Pretraining tokens:** 1 trillion pretraining & 2M instruction tuning
- **Precision:** bfloat16
## Hardware
- **Pretraining:**
- **GPUs:** 512 Tesla A100
- **Training time:** 24 days
- **Instruction tuning:**
- **GPUs:** 8 Tesla A100
- **Training time:** 4 hours
## Software
- **Orchestration:** [Megatron-LM/Transformers](https://github.com/bigcode-project/octopack#training)
- **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch)
# Citation
```bibtex
@article{muennighoff2023octopack,
title={OctoPack: Instruction Tuning Code Large Language Models},
author={Niklas Muennighoff and Qian Liu and Armel Zebaze and Qinkai Zheng and Binyuan Hui and Terry Yue Zhuo and Swayam Singh and Xiangru Tang and Leandro von Werra and Shayne Longpre},
journal={arXiv preprint arXiv:2308.07124},
year={2023}
}
```

3
config.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0dc5b8bf6be78dfacc1a5ddb7e2224b69dfbe0a30e632db4891784ce3340f9bb
size 1008

3
generation_config.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:634b0b7323db9a5f1421a068af9f79c9a2b403496a74cd2ce44e6207af41d912
size 116

48892
merges.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:09ac7601c3d2f981714b44d2b52c9caebd0c77b934e56203d6021d91e00bf41c
size 9904362872

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e983ec634521f4e32fb06de0a37de5a12adf1195f1d56ef662647a20179c2dd8
size 9860447256

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dc9b7beaba475db0578e79ecc545a2f6c7647c05deab03bed3b92da52b930341
size 9854228560

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1c3207001107e933840897b7b4f54f89c666115efa47c15e5624be58a8bae189
size 9860447304

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71f732da4a08546b712eed97021bf29aa7d57f40f817528eab4a46214c6b15e9
size 9854228560

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e3834928c2ea4919d6fe81e379ff16343f802dd57d9a00d1828c92df89a706ec
size 9860447304

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b053bc62199e0d4636f6819412fb45065311f71a886194a14309ebbb1608c69b
size 2875714840

View File

@@ -0,0 +1,492 @@
{
"metadata": {
"total_size": 62069825536
},
"weight_map": {
"lm_head.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.0.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.0.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.1.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.1.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.10.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.10.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.10.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.10.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.10.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.10.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.10.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.11.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.11.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.12.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.12.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.12.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.12.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.12.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.12.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.12.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.12.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.12.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.12.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.12.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.12.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.13.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.13.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.14.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.14.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.15.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.15.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.16.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.16.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.17.mlp.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.17.mlp.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.attn.c_attn.bias": "model-00003-of-00007.safetensors",
"transformer.h.18.attn.c_attn.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.attn.c_proj.bias": "model-00003-of-00007.safetensors",
"transformer.h.18.attn.c_proj.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.ln_1.bias": "model-00003-of-00007.safetensors",
"transformer.h.18.ln_1.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.ln_2.bias": "model-00003-of-00007.safetensors",
"transformer.h.18.ln_2.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.mlp.c_fc.bias": "model-00003-of-00007.safetensors",
"transformer.h.18.mlp.c_fc.weight": "model-00003-of-00007.safetensors",
"transformer.h.18.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.18.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.19.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.19.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.2.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.2.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.2.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.2.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.2.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.2.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.2.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.20.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.20.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.20.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.20.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.20.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.20.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.20.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.21.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.21.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.22.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.22.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.23.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.23.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.attn.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.attn.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.ln_2.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.ln_2.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.mlp.c_fc.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.mlp.c_fc.weight": "model-00004-of-00007.safetensors",
"transformer.h.24.mlp.c_proj.bias": "model-00004-of-00007.safetensors",
"transformer.h.24.mlp.c_proj.weight": "model-00004-of-00007.safetensors",
"transformer.h.25.attn.c_attn.bias": "model-00004-of-00007.safetensors",
"transformer.h.25.attn.c_attn.weight": "model-00004-of-00007.safetensors",
"transformer.h.25.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.25.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.25.ln_1.bias": "model-00004-of-00007.safetensors",
"transformer.h.25.ln_1.weight": "model-00004-of-00007.safetensors",
"transformer.h.25.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.25.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.25.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.25.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.25.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.25.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.26.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.26.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.27.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.27.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.28.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.28.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.29.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.29.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.3.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.3.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.3.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.3.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.3.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.3.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.3.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.30.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.30.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.30.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.30.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.30.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.30.mlp.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.30.mlp.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.attn.c_attn.bias": "model-00005-of-00007.safetensors",
"transformer.h.31.attn.c_attn.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.attn.c_proj.bias": "model-00005-of-00007.safetensors",
"transformer.h.31.attn.c_proj.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.ln_1.bias": "model-00005-of-00007.safetensors",
"transformer.h.31.ln_1.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.ln_2.bias": "model-00005-of-00007.safetensors",
"transformer.h.31.ln_2.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.mlp.c_fc.bias": "model-00005-of-00007.safetensors",
"transformer.h.31.mlp.c_fc.weight": "model-00005-of-00007.safetensors",
"transformer.h.31.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.31.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.32.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.32.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.33.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.33.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.34.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.34.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.35.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.35.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.36.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.36.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.attn.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.attn.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.ln_2.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.ln_2.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.mlp.c_fc.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.mlp.c_fc.weight": "model-00006-of-00007.safetensors",
"transformer.h.37.mlp.c_proj.bias": "model-00006-of-00007.safetensors",
"transformer.h.37.mlp.c_proj.weight": "model-00006-of-00007.safetensors",
"transformer.h.38.attn.c_attn.bias": "model-00006-of-00007.safetensors",
"transformer.h.38.attn.c_attn.weight": "model-00006-of-00007.safetensors",
"transformer.h.38.attn.c_proj.bias": "model-00007-of-00007.safetensors",
"transformer.h.38.attn.c_proj.weight": "model-00007-of-00007.safetensors",
"transformer.h.38.ln_1.bias": "model-00006-of-00007.safetensors",
"transformer.h.38.ln_1.weight": "model-00006-of-00007.safetensors",
"transformer.h.38.ln_2.bias": "model-00007-of-00007.safetensors",
"transformer.h.38.ln_2.weight": "model-00007-of-00007.safetensors",
"transformer.h.38.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
"transformer.h.38.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
"transformer.h.38.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
"transformer.h.38.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.attn.c_attn.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.attn.c_attn.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.attn.c_proj.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.attn.c_proj.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.ln_1.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.ln_1.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.ln_2.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.ln_2.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.mlp.c_fc.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.mlp.c_fc.weight": "model-00007-of-00007.safetensors",
"transformer.h.39.mlp.c_proj.bias": "model-00007-of-00007.safetensors",
"transformer.h.39.mlp.c_proj.weight": "model-00007-of-00007.safetensors",
"transformer.h.4.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.4.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.4.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.4.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.4.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.4.mlp.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.4.mlp.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.attn.c_attn.bias": "model-00001-of-00007.safetensors",
"transformer.h.5.attn.c_attn.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.attn.c_proj.bias": "model-00001-of-00007.safetensors",
"transformer.h.5.attn.c_proj.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.ln_1.bias": "model-00001-of-00007.safetensors",
"transformer.h.5.ln_1.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.ln_2.bias": "model-00001-of-00007.safetensors",
"transformer.h.5.ln_2.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.mlp.c_fc.bias": "model-00001-of-00007.safetensors",
"transformer.h.5.mlp.c_fc.weight": "model-00001-of-00007.safetensors",
"transformer.h.5.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.5.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.6.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.6.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.7.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.7.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.8.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.8.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.attn.c_attn.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.attn.c_attn.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.attn.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.attn.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.ln_1.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.ln_1.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.ln_2.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.ln_2.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.mlp.c_fc.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.mlp.c_fc.weight": "model-00002-of-00007.safetensors",
"transformer.h.9.mlp.c_proj.bias": "model-00002-of-00007.safetensors",
"transformer.h.9.mlp.c_proj.weight": "model-00002-of-00007.safetensors",
"transformer.ln_f.bias": "model-00007-of-00007.safetensors",
"transformer.ln_f.weight": "model-00007-of-00007.safetensors",
"transformer.wpe.weight": "model-00001-of-00007.safetensors",
"transformer.wte.weight": "model-00001-of-00007.safetensors"
}
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3e3be7ef755097a936f6134cd543ccb3ac80f641c882421dccb7906519f524b1
size 9904379303

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b7b8a5ee8e5c9141b9aff700b8c9ae1ce7ac8fc0169faa39bb073493d673fdfa
size 9860464915

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f4c1c61abafb7d59a3a0d44bd8c8a8901cbfc67f40b61c5da1b08e5823e66326
size 9854246167

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7cc8d33c7b57f58ad31d1a0aef3f8e7f00fd3259a3278654315f4a26e45ad59d
size 9860464979

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d2b2177e79d0efd24ceca9678b5aa47fef3bbc262f23fae42834e65510313d72
size 9854246167

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e054051316d386683f3577f50551bdb3a6af3e13d3f2657fb87d35ec9ba478ba
size 9860464979

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e6eb36a714f29b6f8004f17ac3c96407595cd5d2f580c05a23f43b095afdf26e
size 2875719771

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b217742d53295e992d923051a73d629425169b9088af65b76724d3b5195a3ac9
size 36278

3
special_tokens_map.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0823292e24ea07b89317e9ede9d08da2a1b6c014290c06908a7ad04f1efd6719
size 532

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9af07a3123a1f4d75dcb85fbdc4c62f9b7873d23fa39c449d2240c3e33eb3ab5
size 2057423

3
tokenizer_config.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4d8a576be1b7a37446e07a524202302c08ddc116e68b2e042d9fe4eaef46192e
size 717

3
vocab.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:20175afb9f164fad4829aca2279f8df7eeff1e2e3f671378aaa287a740aff09f
size 776993