初始化项目,由ModelHub XC社区提供模型
Model: Locutusque/hyperion-medium-preview Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
189
README.md
Normal file
189
README.md
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
---
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
license: apache-2.0
|
||||||
|
library_name: transformers
|
||||||
|
model-index:
|
||||||
|
- name: hyperion-medium-preview
|
||||||
|
results:
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: AI2 Reasoning Challenge (25-Shot)
|
||||||
|
type: ai2_arc
|
||||||
|
config: ARC-Challenge
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 25
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 60.67
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: HellaSwag (10-Shot)
|
||||||
|
type: hellaswag
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 10
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 83.67
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: MMLU (5-Shot)
|
||||||
|
type: cais/mmlu
|
||||||
|
config: all
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 63.73
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: TruthfulQA (0-shot)
|
||||||
|
type: truthful_qa
|
||||||
|
config: multiple_choice
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 0
|
||||||
|
metrics:
|
||||||
|
- type: mc2
|
||||||
|
value: 42.93
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: Winogrande (5-shot)
|
||||||
|
type: winogrande
|
||||||
|
config: winogrande_xl
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 78.53
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: GSM8k (5-shot)
|
||||||
|
type: gsm8k
|
||||||
|
config: main
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 40.49
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Locutusque/hyperion-medium-preview
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
---
|
||||||
|
# Model Card for M4-ai/hyperion-medium-preview
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Model Details
|
||||||
|
**Model Name**: M4-ai/hyperion-medium-preview
|
||||||
|
**Base Model**: mistralai/Mistral-7B-v0.1
|
||||||
|
**Publisher**: M4-ai
|
||||||
|
**Model Type**: Question answering, conversational AI, code generation, medical text comprehension, mathematical reasoning, logical reasoning.
|
||||||
|
**Language**: Multi-domain, English language.
|
||||||
|
**License**: Apache-2.0
|
||||||
|
|
||||||
|
## Model Description
|
||||||
|
`M4-ai/hyperion-medium-preview` is a state-of-the-art language model fine-tuned on the Hyperion dataset for advanced reasoning across scientific domains. This model is designed to handle complex inquiries and instructions, leveraging the diverse and rich information contained in the Hyperion dataset. Its primary use cases include but are not limited to complex question answering, conversational understanding, code generation, medical text comprehension, mathematical reasoning, and logical reasoning.
|
||||||
|
|
||||||
|
## Intended Use
|
||||||
|
This model is intended for researchers and practitioners looking for a powerful tool to tackle challenging problems in scientific domains. It can be used in the following scenarios:
|
||||||
|
- AI-driven tutoring systems for science, medicine, mathematics, and computer science.
|
||||||
|
- Assistive tools for professionals requiring fast and accurate domain-specific information retrieval.
|
||||||
|
- Platforms that require conversational AI capabilities with a focus on technical and scientific reasoning.
|
||||||
|
- Automation in code generation and understanding complex programming context.
|
||||||
|
|
||||||
|
## Training Data
|
||||||
|
The `M4-ai/hyperion-medium-preview` model was fine-tuned on the Hyperion dataset, which amalgamates various datasets rich in diversity and complexity, including programming, medical texts, mathematical problems, and reasoning tasks.
|
||||||
|
|
||||||
|
## Evaluation Results
|
||||||
|
Coming soon...
|
||||||
|
|
||||||
|
## How to Use
|
||||||
|
```python
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
|
model_name = "M4-ai/hyperion-medium-preview"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_name)
|
||||||
|
|
||||||
|
# For a text generation task
|
||||||
|
input_text = "<|im_start|>user\nWhat are the implications of Einstein's theory of relativity in modern physics?<|im_end|>\n<|im_start|>assistant\n"
|
||||||
|
input_ids = tokenizer.encode(input_text, return_tensors="pt")
|
||||||
|
|
||||||
|
# Generate a response
|
||||||
|
outputs = model.generate(input_ids, max_length=200, num_return_sequences=1, temperature=0.8, top_p=0.95, top_k=40, repetition_penalty=1.1)
|
||||||
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||||
|
```
|
||||||
|
## Known Limitations
|
||||||
|
|
||||||
|
The diversity of the dataset could lead to inconsistencies in the model's responses due to variations in data formatting and annotation quality.
|
||||||
|
## Licensing Information
|
||||||
|
|
||||||
|
This model is released under the Apache-2.0 license.
|
||||||
|
## Citation Information
|
||||||
|
|
||||||
|
If you use M4-ai/hyperion-medium-preview in your research, please cite the Hyperion dataset as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{sebastian_gabarain_2024,
|
||||||
|
title = {Hyperion-1: Illuminating the Path to Advanced Reasoning with a High-Quality, Multidisciplinary Question Answering Dataset},
|
||||||
|
author = {Sebastian Gabarain},
|
||||||
|
publisher = {HuggingFace},
|
||||||
|
year = {2024},
|
||||||
|
url = {https://huggingface.co/datasets/Locutusque/hyperion-v1.0}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quants
|
||||||
|
|
||||||
|
- exl2 by bartowski: https://huggingface.co/bartowski/hyperion-medium-preview-exl2
|
||||||
|
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||||
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Locutusque__hyperion-medium-preview)
|
||||||
|
|
||||||
|
| Metric |Value|
|
||||||
|
|---------------------------------|----:|
|
||||||
|
|Avg. |61.67|
|
||||||
|
|AI2 Reasoning Challenge (25-Shot)|60.67|
|
||||||
|
|HellaSwag (10-Shot) |83.67|
|
||||||
|
|MMLU (5-Shot) |63.73|
|
||||||
|
|TruthfulQA (0-shot) |42.93|
|
||||||
|
|Winogrande (5-shot) |78.53|
|
||||||
|
|GSM8k (5-shot) |40.49|
|
||||||
|
|
||||||
26
config.json
Normal file
26
config.json
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "mistralai/Mistral-7B-v0.1",
|
||||||
|
"architectures": [
|
||||||
|
"MistralForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 14336,
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"model_type": "mistral",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"sliding_window": 4096,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.38.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 32000
|
||||||
|
}
|
||||||
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"transformers_version": "4.38.1"
|
||||||
|
}
|
||||||
3
model-00001-of-00008.safetensors
Normal file
3
model-00001-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c511bf1f2d6cbadea9b4d23e07db1bcca49c93fd319c307212f31e88559673b7
|
||||||
|
size 1889587040
|
||||||
3
model-00002-of-00008.safetensors
Normal file
3
model-00002-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3e6c001f579a12682d70737a6428362b5233ef94fe14b27e4138f01da609e1a9
|
||||||
|
size 1946243936
|
||||||
3
model-00003-of-00008.safetensors
Normal file
3
model-00003-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bc9887a9f70cdfdb28b80b5d0132e41b58ce071eddd286b47b2254b127cb76db
|
||||||
|
size 1979781432
|
||||||
3
model-00004-of-00008.safetensors
Normal file
3
model-00004-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bbfe05c0662fdd63a6aa51efcc0e1684ee1b81726d5877b332ac27d9c1ecdfb8
|
||||||
|
size 1946243984
|
||||||
3
model-00005-of-00008.safetensors
Normal file
3
model-00005-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:7ced6f9b0fa17ec6e81593d18fefa020c46a32e2249d97471c198cb5dd0bb0d2
|
||||||
|
size 1979781448
|
||||||
3
model-00006-of-00008.safetensors
Normal file
3
model-00006-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d2db34e2553111c6dd5a79d759a21c88afa6f5b806900dc55f6ed864fc06132c
|
||||||
|
size 1946243984
|
||||||
3
model-00007-of-00008.safetensors
Normal file
3
model-00007-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:192d96f367e805f0aee5c181cab6dfe437bbeb58586414860fbf0dc51c4c65b4
|
||||||
|
size 1979781448
|
||||||
3
model-00008-of-00008.safetensors
Normal file
3
model-00008-of-00008.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fbb5db0048eecc8018040a747901468a9386dcd96362e817b6e2149d5b151907
|
||||||
|
size 815834680
|
||||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 14483464192
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
|
||||||
|
"model.norm.weight": "model-00008-of-00008.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
24
special_tokens_map.json
Normal file
24
special_tokens_map.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
91136
tokenizer.json
Normal file
91136
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
42
tokenizer_config.json
Normal file
42
tokenizer_config.json
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [],
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"legacy": true,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user