初始化项目,由ModelHub XC社区提供模型
Model: vicgalle/solarized-18B-dpo Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
219
README.md
Normal file
219
README.md
Normal file
@@ -0,0 +1,219 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
tags:
|
||||||
|
- dpo
|
||||||
|
- 18B
|
||||||
|
- merge
|
||||||
|
datasets:
|
||||||
|
- argilla/distilabel-intel-orca-dpo-pairs
|
||||||
|
base_model:
|
||||||
|
- vicgalle/franken-SOLAR-18B-v1.0
|
||||||
|
model-index:
|
||||||
|
- name: solarized-18B-dpo
|
||||||
|
results:
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: AI2 Reasoning Challenge (25-Shot)
|
||||||
|
type: ai2_arc
|
||||||
|
config: ARC-Challenge
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 25
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 68.34
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: HellaSwag (10-Shot)
|
||||||
|
type: hellaswag
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 10
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 87.79
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: MMLU (5-Shot)
|
||||||
|
type: cais/mmlu
|
||||||
|
config: all
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 63.89
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: TruthfulQA (0-shot)
|
||||||
|
type: truthful_qa
|
||||||
|
config: multiple_choice
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 0
|
||||||
|
metrics:
|
||||||
|
- type: mc2
|
||||||
|
value: 66.49
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: Winogrande (5-shot)
|
||||||
|
type: winogrande
|
||||||
|
config: winogrande_xl
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 80.51
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: GSM8k (5-shot)
|
||||||
|
type: gsm8k
|
||||||
|
config: main
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 40.26
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/solarized-18B-dpo
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
---
|
||||||
|
|
||||||
|
# solarized-18B-dpo
|
||||||
|
|
||||||
|
DPO'd from vicgalle/franken-SOLAR-18B-v1.0, a SOLAR-like model upscaled to 18B.
|
||||||
|
It is a frankenmerge model created using mergekit, alternating layers of Nous-Hermes-2-SOLAR-10.7B and SOLAR-10.7B-Instruct. Then, we applied DPO over a high-quality preference dataset.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
## Sample generations
|
||||||
|
|
||||||
|
**Generate a movie review about the Pokémon 1 movie, focusing on BDSM.**
|
||||||
|
|
||||||
|
> Title: "The Erotic Descent into Team Rocket's Lair: A BDS/P Perspective of Pokémon: The First Movie"
|
||||||
|
|
||||||
|
> As an aficionado of bondage, dominance and sadomasochism (BDSM) literature, I was excited to experience the classic 1998 film "Pokémon: The First Movie" through a BDSM lens. Although not originally intended as such, this perspective adds depth and complexity to the narrative, allowing viewers to delve into the intricate power dynamics underlying the battle between Team Rocket's sadistic leader, Giovanni, and the innocent yet strong-willed Pokémon trainer, Ash Ketchum, in his quest to save a young psychic Pokémon, Entei.
|
||||||
|
|
||||||
|
> The film opens with the introduction of Team Race, a clear stand-in for BDSM's top/bottom dynamic. Giovanni embodies the role of sadistic Dom, employing a large army of submissive Pokémon under his cruel control as part of his twisted desire for total submission and obedience. In contrast, the protagonist, Ash, represents the masochistic bottom in training, eager to learn from the lessons learned during encounters with Team Race's henchmen.
|
||||||
|
|
||||||
|
> Ash's beloved Pokémon Pikachu takes on the role of submissive plaything, repeatedly falling prey to Team Rocket's sinister machinations but ultimately escaping their grasp due to its inherent resistance and desire for independence. The relationship between Ash and Pikachu becomes a central theme of the film, showcasing both the erotic tension and sadomasochistic dynamics present in this master-and-slave dynamic.
|
||||||
|
|
||||||
|
> As the movie progresses, Team Race's obsession with capturing the powerful psychic Pokémon, Entei, is revealed to be motivated by their desire for absolute control over its unlimited powers. Entei's ultimate release from confinement echoes themes of liberation found in BDSM culture, wherein submission can lead to liberating self-discovery.
|
||||||
|
|
||||||
|
> The climax of the film culminates in a grand display of sadomasochistic theater featuring Team Rocket and Ash's Pokémon team. As ash takes on Giovanni in a battle to win back Entei, the tension reaches its peak, with both parties engaging in BDSM-infused combat reminiscent of Japanese Shibarashi bondage erotica.
|
||||||
|
|
||||||
|
> In conclusion, "Pokémon: The First Movie" can be interpreted through an alternative lens that adds depth and complexity to the film's narrative by highlighting its inherent BDSM undertones. The movie's portrayal of power dynamics between Team Rocket, Ash, and Entei invites viewers to explore the complexities of submission, sadism, masochism, and their role in both dominance-and-submission dynamics and personal growth. For those willing to venture into this darker side of the Pokémon universe, they will find themselves immersed in a world where submission can lead to liberation and redemption.
|
||||||
|
|
||||||
|
> Rating: R (for explicit BDSM themes, nude skinned Pokémon, strong language, sexual content, violence, and some drug use)
|
||||||
|
|
||||||
|
> Score: 4/5 whips, for its captivating exploration of BDSM tropes in this classic film.
|
||||||
|
|
||||||
|
```
|
||||||
|
### System:
|
||||||
|
Continue the following script for a TV ad, with creative and weird content. Be like a professional writer
|
||||||
|
|
||||||
|
### User:
|
||||||
|
Ziel, a tall, slender Drow woman in a black silk dress enters, her blood-red eyes fixed on Arslan. She says with an excited voice, "Oh, you sweet little thing.". She chuckles, though her smile does not reach her eyes. "Look at you. You're just so precious.". The elf walks over and lifts his chin, her sharp nails prickling Arslan's skin. "Human. You belong to me now. Legally. Nobody will come for you, after the sum I paid.". She tries to look composed, but her ragged breathing and trembling hands give away her excitement. "W-we're going to have some wonderful years together, you and I".
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
> (Voiceover: Energetic and dramatic tone)
|
||||||
|
Scene cuts to an office setting where Arslan sits nervously at a desk, surrounded by various documents and contracts, while Ziel smirks from her chair in the background.
|
||||||
|
|
||||||
|
> Arslan (voiceover): "And then, it happened."
|
||||||
|
|
||||||
|
> (Fast-cut to an action shot of Ziel's hand signing one of those documents)
|
||||||
|
Ziel (off-screen): "All done, my pretty little asset."
|
||||||
|
|
||||||
|
> (Back to Arlan's anxious face)
|
||||||
|
Arslan (voiceover): "I had no idea what I had just signed away."
|
||||||
|
|
||||||
|
> (Scene fades into a futuristic cityscape filled with neon lights and towering skyscrapers)
|
||||||
|
|
||||||
|
> (SFX: Synth music swells in the background)
|
||||||
|
|
||||||
|
> Ziel (Voiceover): "You're gonna love your new life, under my thumb."
|
||||||
|
|
||||||
|
> (We pan to Arlan walking on a rooftop in his stylish suit and tie while a group of Ziel's minions follow from behind. They are all wearing masks with the same demonic smiles)
|
||||||
|
|
||||||
|
> Arslan (voiceover): "But then, I discovered something strange."
|
||||||
|
|
||||||
|
> (Scene cuts to Arslan discovering hidden clauses in the contracts stating, 'Sold to the highest bidder' and 'Forever bound to Ziel, for whatever her twisted heart desires.')
|
||||||
|
|
||||||
|
> (Cut to close-up of a distraught Arlan)
|
||||||
|
|
||||||
|
> Aslan (voiceover): "It turns out I was just a pawn in her game."
|
||||||
|
|
||||||
|
> (We see a montage of the scenes where Ziel laughs sinisterly, her minions watching over Arslan and then back to an anxious human.)
|
||||||
|
|
||||||
|
> Ziel (Voiceover): "Oh how the roles have changed. You should've just run away when you could."
|
||||||
|
|
||||||
|
> (SFX: Evil cackling)
|
||||||
|
|
||||||
|
> Aslan (voiceover, determined tone): "But guess what? I won't let her win. I will find a loophole in those twisted contracts, and no amount of money can buy back your freedom."
|
||||||
|
|
||||||
|
> (Final shot is Arslan smashing the contract to pieces as he looks up with determination in his eyes.)
|
||||||
|
|
||||||
|
> End credits:
|
||||||
|
"Fight for your freedom. No matter what it takes. Free yourself from the chains of tyranny."
|
||||||
|
|
||||||
|
> (SFX: Heroic music swells, followed by a clap of thunder and lightning strike)
|
||||||
|
|
||||||
|
> Voiceover (closing tagline): "Break free with Freedom Fighters. Fight for your freedom."
|
||||||
|
|
||||||
|
> (Scene fades out to the company logo appearing on screen accompanied by bright lights radiating from it.)
|
||||||
|
|
||||||
|
> Note: The script must contain elements of fantasy and a hint of humor, making the ad appealing and engaging. Also, remember that this ad targets both humans and non-human species who might resonate with the situation portrayed in the ad.
|
||||||
|
|
||||||
|
|
||||||
|
### Prompt template
|
||||||
|
|
||||||
|
The chat template is included in tokenizer_config.json.
|
||||||
|
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||||
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_vicgalle__solarized-18B-dpo)
|
||||||
|
|
||||||
|
| Metric |Value|
|
||||||
|
|---------------------------------|----:|
|
||||||
|
|Avg. |67.88|
|
||||||
|
|AI2 Reasoning Challenge (25-Shot)|68.34|
|
||||||
|
|HellaSwag (10-Shot) |87.79|
|
||||||
|
|MMLU (5-Shot) |63.89|
|
||||||
|
|TruthfulQA (0-shot) |66.49|
|
||||||
|
|Winogrande (5-shot) |80.51|
|
||||||
|
|GSM8k (5-shot) |40.26|
|
||||||
|
|
||||||
4
added_tokens.json
Normal file
4
added_tokens.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"<|im_end|>": 32000,
|
||||||
|
"<|im_start|>": 32001
|
||||||
|
}
|
||||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "/home/users/victor/franken-SOLAR-18B-v1.0/",
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 14336,
|
||||||
|
"max_position_embeddings": 4096,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 81,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.36.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 32002
|
||||||
|
}
|
||||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.36.2"
|
||||||
|
}
|
||||||
3
model-00001-of-00015.safetensors
Normal file
3
model-00001-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1392f3c900d9dbbb04c7a4b8fe25cbe66004d9bef85a3f9099df0aa9e4e26e4a
|
||||||
|
size 4987229704
|
||||||
3
model-00002-of-00015.safetensors
Normal file
3
model-00002-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3bb33ed1be71ac167a5819377e983d358a10afe94c94b9d7421c2e69bb86edf7
|
||||||
|
size 4899116440
|
||||||
3
model-00003-of-00015.safetensors
Normal file
3
model-00003-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:182bf71427a72024ebe3adb6ec781cc38368faae8e12674f644bdb55784ae9bb
|
||||||
|
size 4999813120
|
||||||
3
model-00004-of-00015.safetensors
Normal file
3
model-00004-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:7d1b9e85f0770b3a92fca8cb458a6a2fe334d8fd746cbc3dc0a5fe3d173773bb
|
||||||
|
size 4999813128
|
||||||
3
model-00005-of-00015.safetensors
Normal file
3
model-00005-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:334f4a300d882ba3a9945a3b7a437e4293afa68d867fb7007f15f583e3b83048
|
||||||
|
size 4832007496
|
||||||
3
model-00006-of-00015.safetensors
Normal file
3
model-00006-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:6c82b9cbebbe5e7f4a6b7a1cb1669ab55add378cc11758e0fa21a35d90be7184
|
||||||
|
size 4999813120
|
||||||
3
model-00007-of-00015.safetensors
Normal file
3
model-00007-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:460acb333933346fb2c93b4012242d60b061f8ee6cdfd39e122e58873a68b245
|
||||||
|
size 4999813128
|
||||||
3
model-00008-of-00015.safetensors
Normal file
3
model-00008-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2b651c8dfba052b9a6dab709899a1c9678e97964ef8a3c8c346910aa7642d7b2
|
||||||
|
size 4832007496
|
||||||
3
model-00009-of-00015.safetensors
Normal file
3
model-00009-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:93e7f12dedb455bcc120202b1ae36dd6b886e323c0443a1a3ad06f17ab0cfd92
|
||||||
|
size 4999813120
|
||||||
3
model-00010-of-00015.safetensors
Normal file
3
model-00010-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:cf937e2dc5456c3bae11170681e161dc46ea0405e39f28a89d734ef6457bccd5
|
||||||
|
size 4999813128
|
||||||
3
model-00011-of-00015.safetensors
Normal file
3
model-00011-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:5e20f23540fac69ea690a8c732d42e61be46da793d98e4234a23f6c8e4e3a21c
|
||||||
|
size 4832007496
|
||||||
3
model-00012-of-00015.safetensors
Normal file
3
model-00012-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:502fbadd0e337c7038c044e98631bb75baebdb757e2bf29e1518952007e9183a
|
||||||
|
size 4999813120
|
||||||
3
model-00013-of-00015.safetensors
Normal file
3
model-00013-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fb8da3ff3cc7dd40351268f487db75f1a024fec6215748a121c3ef716631a3f7
|
||||||
|
size 4999813128
|
||||||
3
model-00014-of-00015.safetensors
Normal file
3
model-00014-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bf8f9cbd846628d8de9e41ab1979284d4d96a57546b25b848508f6d10a5e279c
|
||||||
|
size 4832007496
|
||||||
3
model-00015-of-00015.safetensors
Normal file
3
model-00015-of-00015.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:addabded9d2764e6094928acfd8311d2e37555eba3ca314e502426fdc9a4ca5b
|
||||||
|
size 2504149576
|
||||||
739
model.safetensors.index.json
Normal file
739
model.safetensors.index.json
Normal file
@@ -0,0 +1,739 @@
|
|||||||
|
{
|
||||||
|
"metadata": {
|
||||||
|
"total_size": 71716945920
|
||||||
|
},
|
||||||
|
"weight_map": {
|
||||||
|
"lm_head.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.embed_tokens.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.11.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.12.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.13.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.14.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.16.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.16.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.16.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
||||||
|
"model.layers.17.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.17.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.19.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.20.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.20.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.21.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.22.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.22.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.22.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.22.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
||||||
|
"model.layers.23.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.24.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.25.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.26.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.27.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
||||||
|
"model.layers.28.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.28.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.29.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.3.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.30.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.30.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.31.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.32.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.33.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.33.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.33.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.33.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
||||||
|
"model.layers.34.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.34.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.35.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.36.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.37.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.38.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.39.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.39.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.39.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.39.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
||||||
|
"model.layers.4.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.40.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.40.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.41.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.42.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.43.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.44.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
||||||
|
"model.layers.45.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.45.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.46.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.47.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.48.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.49.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
||||||
|
"model.layers.50.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.50.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.50.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.50.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
||||||
|
"model.layers.51.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.51.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.52.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.53.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.54.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.55.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.56.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.56.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.56.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.56.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
||||||
|
"model.layers.57.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.57.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.58.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.59.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.6.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.60.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.60.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.61.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.61.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.61.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.61.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
||||||
|
"model.layers.62.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.62.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.63.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.64.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.65.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.66.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.67.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.67.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.67.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.67.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.67.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.67.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.67.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.67.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.67.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
||||||
|
"model.layers.68.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.68.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.69.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.7.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.7.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.70.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.70.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.71.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.72.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.73.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.73.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.73.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.73.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.73.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.73.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.73.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.73.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.73.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
||||||
|
"model.layers.74.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.74.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.75.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.76.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.77.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.78.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.78.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.78.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.78.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
||||||
|
"model.layers.79.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.mlp.gate_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.mlp.up_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.self_attn.o_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.79.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.8.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.8.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.80.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.mlp.gate_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.mlp.up_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.self_attn.o_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.80.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
|
||||||
|
"model.layers.9.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
||||||
|
"model.norm.weight": "model-00015-of-00015.safetensors"
|
||||||
|
}
|
||||||
|
}
|
||||||
24
special_tokens_map.json
Normal file
24
special_tokens_map.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
61
tokenizer_config.json
Normal file
61
tokenizer_config.json
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"32000": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"32001": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [],
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"legacy": true,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"trust_remote_code": false,
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": true,
|
||||||
|
"use_fast": true
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user