初始化项目,由ModelHub XC社区提供模型
Model: Locutusque/OpenCerebrum-1.0-7b-DPO Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
171
README.md
Normal file
171
README.md
Normal file
@@ -0,0 +1,171 @@
|
||||
---
|
||||
language:
|
||||
- en
|
||||
license: apache-2.0
|
||||
tags:
|
||||
- open-source
|
||||
- code
|
||||
- math
|
||||
- chemistry
|
||||
- biology
|
||||
- text-generation
|
||||
- question-answering
|
||||
datasets:
|
||||
- Locutusque/OpenCerebrum-dpo
|
||||
pipeline_tag: text-generation
|
||||
model-index:
|
||||
- name: OpenCerebrum-1.0-7b-DPO
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: AI2 Reasoning Challenge (25-Shot)
|
||||
type: ai2_arc
|
||||
config: ARC-Challenge
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 25
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 62.71
|
||||
name: normalized accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: HellaSwag (10-Shot)
|
||||
type: hellaswag
|
||||
split: validation
|
||||
args:
|
||||
num_few_shot: 10
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 84.33
|
||||
name: normalized accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: MMLU (5-Shot)
|
||||
type: cais/mmlu
|
||||
config: all
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 62.59
|
||||
name: accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: TruthfulQA (0-shot)
|
||||
type: truthful_qa
|
||||
config: multiple_choice
|
||||
split: validation
|
||||
args:
|
||||
num_few_shot: 0
|
||||
metrics:
|
||||
- type: mc2
|
||||
value: 44.91
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: Winogrande (5-shot)
|
||||
type: winogrande
|
||||
config: winogrande_xl
|
||||
split: validation
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 80.11
|
||||
name: accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: GSM8k (5-shot)
|
||||
type: gsm8k
|
||||
config: main
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 42.0
|
||||
name: accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/OpenCerebrum-1.0-7b-DPO
|
||||
name: Open LLM Leaderboard
|
||||
---
|
||||
|
||||
# OpenCerebrum-1.0-7B-DPO
|
||||
|
||||
OpenCerebrum-1.0-7B-DPO is an open-source language model fine-tuned from the alpindale/Mistral-7B-v0.2-hf base model on a diverse dataset aimed at replicating capabilities of Aether Research's proprietary Cerebrum model.
|
||||
|
||||
The model was fine-tuned on approximately 21,000 examples across 6 datasets spanning coding, math, science, reasoning, and general instruction-following. The goal was to assemble public datasets that could help the model achieve strong performance on benchmarks where Cerebrum excels.
|
||||
|
||||
I used the ChatML prompt format to train this model.
|
||||
|
||||
## Model Details
|
||||
|
||||
- **Base Model:** alpindale/Mistral-7B-v0.2-hf
|
||||
- **Parameters:** 7 billion
|
||||
- **Fine-Tuning Dataset Size:** ~21,000 examples
|
||||
- **Fine-Tuning Data:** Amalgamation of 6 public datasets
|
||||
- **Language:** English
|
||||
- **License:** Apache 2.0
|
||||
|
||||
## Quants
|
||||
|
||||
- **ExLlamaV2:** https://huggingface.co/bartowski/OpenCerebrum-1.0-7b-DPO-exl2
|
||||
- **GGUF:** https://huggingface.co/bartowski/OpenCerebrum-1.0-7b-DPO-GGUF
|
||||
- **AWQ:** https://huggingface.co/solidrust/OpenCerebrum-1.0-7b-DPO-AWQ
|
||||
|
||||
## Intended Use
|
||||
|
||||
OpenCerebrum-1.0-7B-DPO is intended to be a powerful open-source model for coding, math, science, and general question-answering and text generation tasks. Its diverse fine-tuning data aims to equip it with broad knowledge and reasoning capabilities.
|
||||
|
||||
However, as an open-source replica trained on a subset of data compared to the original Cerebrum, it may not match Cerebrum's full performance. Additionally, biases and limitations of the fine-tuning data may be reflected in the model's outputs.
|
||||
|
||||
## Limitations and Biases
|
||||
|
||||
- The model may have biases and limitations inherited from its fine-tuning datasets. Thorough testing is needed to characterize these.
|
||||
- With 21,000 training examples, the fine-tuning data is still limited compared to the proprietary Cerebrum data.
|
||||
- As the model is based on a 7B parameter model, it has computational and memory constraints compared to larger models.
|
||||
|
||||
## Training Details
|
||||
|
||||
The model was fine-tuned on the 6 datasets listed in the Datasets section, totaling approximately 21,000 examples. In the future, the fine-tuning dataset may be condensed to more closely match the ~500 example dataset reputedly used for the original Cerebrum model.
|
||||
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Locutusque__OpenCerebrum-1.0-7b-DPO)
|
||||
|
||||
| Metric |Value|
|
||||
|---------------------------------|----:|
|
||||
|Avg. |62.78|
|
||||
|AI2 Reasoning Challenge (25-Shot)|62.71|
|
||||
|HellaSwag (10-Shot) |84.33|
|
||||
|MMLU (5-Shot) |62.59|
|
||||
|TruthfulQA (0-shot) |44.91|
|
||||
|Winogrande (5-shot) |80.11|
|
||||
|GSM8k (5-shot) |42.00|
|
||||
|
||||
26
config.json
Normal file
26
config.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"_name_or_path": "Locutusque/OpenCerebrum-1.0-7b-SFT",
|
||||
"architectures": [
|
||||
"MistralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 32768,
|
||||
"model_type": "mistral",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.39.1",
|
||||
"use_cache": true,
|
||||
"vocab_size": 32000
|
||||
}
|
||||
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"transformers_version": "4.39.1"
|
||||
}
|
||||
3
model-00001-of-00008.safetensors
Normal file
3
model-00001-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e70084a8fccfc364aa3e83d9d172c0bdd82e0a3769676c7dbc40716b7bf9e1ea
|
||||
size 1889587040
|
||||
3
model-00002-of-00008.safetensors
Normal file
3
model-00002-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f11be2c8f90e696714b8c4220b5bf10aeeccbec00f556bf7f868fad36167ce12
|
||||
size 1946243936
|
||||
3
model-00003-of-00008.safetensors
Normal file
3
model-00003-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a66e4410eac87294957d08e6e6851b45bb030c414f4512ec5eaba55fef9a5aca
|
||||
size 1979781432
|
||||
3
model-00004-of-00008.safetensors
Normal file
3
model-00004-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ccfb93a1f1ed7141875c93c8f30ef8640ca6d30e5d30531d306c38de1adc0e3e
|
||||
size 1946243984
|
||||
3
model-00005-of-00008.safetensors
Normal file
3
model-00005-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d659421146a09270aa5c79a8a310d3a07a3f83e512e478206e3cf33893991c18
|
||||
size 1979781448
|
||||
3
model-00006-of-00008.safetensors
Normal file
3
model-00006-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a1249878fa49b8744386358f5792dddba6df8fe21ec58fa34bd1f89c0b0ccd88
|
||||
size 1946243984
|
||||
3
model-00007-of-00008.safetensors
Normal file
3
model-00007-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:13ed0ad3ffdba2d709cb1be2ec999f7c0bc0f1211c7d79c2c9eba95d2d429641
|
||||
size 1979781448
|
||||
3
model-00008-of-00008.safetensors
Normal file
3
model-00008-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:46d93fa4a9a61987f419de1b888e1d5b7413573e72e6bdae40d4e838b7865219
|
||||
size 815834680
|
||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 14483464192
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00008-of-00008.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||
"model.norm.weight": "model-00008-of-00008.safetensors"
|
||||
}
|
||||
}
|
||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
91122
tokenizer.json
Normal file
91122
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
42
tokenizer_config.json
Normal file
42
tokenizer_config.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"add_prefix_space": true,
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"legacy": true,
|
||||
"model_max_length": 1000000000000000019884624838656,
|
||||
"pad_token": "</s>",
|
||||
"sp_model_kwargs": {},
|
||||
"spaces_between_special_tokens": false,
|
||||
"tokenizer_class": "LlamaTokenizer",
|
||||
"unk_token": "<unk>",
|
||||
"use_default_system_prompt": false
|
||||
}
|
||||
Reference in New Issue
Block a user