From d3d62fb59ca6265a2cf47834fd83e0c47a437b55 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Tue, 5 May 2026 19:51:37 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: TeeZee/Buttocks-7B-v1.1
Source: Original Platform
---
 .gitattributes                   |  35 ++
 README.md                        | 147 +++++++
 config.json                      |  26 ++
 generation_config.json           |   6 +
 mergemonster_toppy_lerp.txt      | 651 +++++++++++++++++++++++++++++++
 model-00001-of-00003.safetensors |   3 +
 model-00002-of-00003.safetensors |   3 +
 model-00003-of-00003.safetensors |   3 +
 model.safetensors.index.json     | 298 ++++++++++++++
 special_tokens_map.json          |   5 +
 tokenizer.model                  |   3 +
 tokenizer_config.json            |  42 ++
 toppy-lerp-merge-config.yml      | 142 +++++++
 13 files changed, 1364 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 README.md
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 mergemonster_toppy_lerp.txt
 create mode 100644 model-00001-of-00003.safetensors
 create mode 100644 model-00002-of-00003.safetensors
 create mode 100644 model-00003-of-00003.safetensors
 create mode 100644 model.safetensors.index.json
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer.model
 create mode 100644 tokenizer_config.json
 create mode 100644 toppy-lerp-merge-config.yml

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6e97a61
--- /dev/null
+++ b/README.md
@@ -0,0 +1,147 @@
+---
+license: cc-by-nc-4.0
+tags:
+- not-for-all-audiences
+- merge
+model-index:
+- name: Buttocks-7B-v1.1
+  results:
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: AI2 Reasoning Challenge (25-Shot)
+      type: ai2_arc
+      config: ARC-Challenge
+      split: test
+      args:
+        num_few_shot: 25
+    metrics:
+    - type: acc_norm
+      value: 54.61
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: HellaSwag (10-Shot)
+      type: hellaswag
+      split: validation
+      args:
+        num_few_shot: 10
+    metrics:
+    - type: acc_norm
+      value: 75.61
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU (5-Shot)
+      type: cais/mmlu
+      config: all
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 50.22
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: TruthfulQA (0-shot)
+      type: truthful_qa
+      config: multiple_choice
+      split: validation
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: mc2
+      value: 44.72
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: Winogrande (5-shot)
+      type: winogrande
+      config: winogrande_xl
+      split: validation
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 68.9
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GSM8k (5-shot)
+      type: gsm8k
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 5.76
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1
+      name: Open LLM Leaderboard
+---
+
+### Buttocks 7B v1.1 ###
+
+An experiment that has gone very, very wrong.
+
+### Model details ###
+
+- Recreation of the original recipe for [Undi95/Toppy-M-7B](https://huggingface.co/Undi95/Toppy-M-7B), but instead of final merge done by mergekit, [MergeMoster](https://github.com/Gryphe/MergeMonster/) was used with extended RPG preset.
+- recipe in [mergekit-config](https://huggingface.co/TeeZee/Toppy-7B-remake-mergemonster-SLERP-v1.0/resolve/main/toppy-slerp-merge-config.yml), stepsAA, BB, CC are the original models with LORAS as per Toppy M 7B sauce.
+- LERP merge method was used
+  
+### Results ###
+
+- in simple terms this model is totally unhinged
+- it always produces sequences similar to fever dreams or drug trips
+- on a good day it can produce scenarios similar to old Monty Python sketches
+- models shows incredible affinity to words like 'ass', 'buttocks', 'farts', prompting with those single words will probably
+  produce a whole story revolving around those topics.
+
+### Possible uses ###
+
+- to generate dream sequence in a story
+- to make the boring model more unpredictable by merging at low weights with this monster
+- to take a break, connect Silly Tavern to this model and get a few ROTFLs observing how every story deteriorates into pure craziness
+- research on LLM hallucinations
+# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_TeeZee__Buttocks-7B-v1.1)
+
+|             Metric              |Value|
+|---------------------------------|----:|
+|Avg.                             |49.97|
+|AI2 Reasoning Challenge (25-Shot)|54.61|
+|HellaSwag (10-Shot)              |75.61|
+|MMLU (5-Shot)                    |50.22|
+|TruthfulQA (0-shot)              |44.72|
+|Winogrande (5-shot)              |68.90|
+|GSM8k (5-shot)                   | 5.76|
+
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..3b9e285
--- /dev/null
+++ b/config.json
@@ -0,0 +1,26 @@
+{
+  "_name_or_path": "../mistralai_Mistral-7B-v0.1",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 32000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..c533f93
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.36.2"
+}
diff --git a/mergemonster_toppy_lerp.txt b/mergemonster_toppy_lerp.txt
new file mode 100644
index 0000000..165e1d8
--- /dev/null
+++ b/mergemonster_toppy_lerp.txt
@@ -0,0 +1,651 @@
+
+⠀⠀⠀⠀⠀⠀⣀⡀⠀⠀⣀⣤⣶⣾⣿⣿⣷⣶⣤⣀⠀⠀⣀⣀⠀⠀⠀⠀⠀⠀
+⠀⠀⠀⠀⠀⠜⠉⣿⡆⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣧⢰⣿⠉⠃⠀⠀⠀⠀⠀
+⠀⢀⣤⣴⣦⣄⣴⠟⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡎⢻⣦⣠⣴⣦⣄⠀⠀
+⠀⡞⠁⣠⣾⢿⣧⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⣽⡿⣷⣄⠈⢷⠀
+⠀⣠⣾⠟⠁⢸⣿⠀⠘⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁⠀⣿⡇⠈⠻⣷⣄⠀
+⣰⡿⠁⠀⢀⣾⣏⣾⣄⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣇⣰⣷⣹⣷⠀⠀⠈⢿⣆
+⣿⡇⠀⢠⣾⠏⢸⣿⣿⣿⣿⠋⢻⣿⣿⣿⣿⡟⠙⣿⣿⣿⣿⡇⠹⣷⡀⠀⢸⣿
+⠹⣿⣴⡿⠋⠀⠈⠛⠉⣹⣿⣦⣄⡹⣿⣿⣋⣠⣶⣿⣏⠉⠛⠁⠀⠙⢿⣦⣿⠏
+⠀⣸⣿⠿⠿⣿⣾⣿⡿⠿⣿⣿⣿⣿⡆⢰⣿⣿⣿⣿⠿⢿⣿⣶⣿⠿⠿⣻⣇⠀
+⠀⣿⡇⢀⣴⣶⣤⣀⣴⣿⠿⣻⡿⣿⣧⣾⣿⢿⣟⠿⣿⣦⣀⣤⣶⣦⠀⢸⣿⠀
+⠀⢿⣧⠈⠃⢀⣵⣿⡋⠁⢀⣿⡷⣿⡇⢻⣿⣿⣿⡀⠈⢛⣿⣮⡀⠘⠀⣼⡟⠀
+⠀⠈⠻⣷⣤⣟⣋⣿⣧⣴⡿⠋⠀⣿⡇⢸⣿⠀⠙⢿⣦⣼⣿⣙⣻⣤⣾⠟⠁⠀
+⠀⠀⠀⠈⢽⣿⠛⢻⣏⢉⣤⣶⣶⣿⠁⠈⣿⣶⣶⣤⡉⣽⡟⠛⣿⡏⠁⠀⠀⠀
+⠀⠀⠀⠀⠈⠿⣷⣾⣾⣟⣉⣠⣿⢿⡇⢸⠿⣿⣄⣙⣻⣷⣷⣾⠿⠁⠀⠀⠀⠀
+⠀⠀⠀⠀⠀⠀⠀⠀⠙⠻⠿⠛⢁⡼⠃⠘⢦⡈⠛⠿⠟⠃⠀⠀⠀⠀⠀⠀⠀⠀
+
+19:59:46 - THE MERGE MONSTER HUNGERS
+------------------------------------
+Device           : cpu
+Random seed      : 42
+Starting model   : ../mistralai_Mistral-7B-v0.1
+Models to merge  : ['../merge_stepAA', '../merge_stepBB', '../merge_stepCC']
+Output directory : ./toppy_new_remake2
+Phrases loaded   : 31
+Auto weights     : False
+Merge ratios     : [0.2, 0.4, 0.6, 0.8]
+Merge method(s)  : ['lerp']
+Merge headers    : True
+Strategy used    : cumulative
+------------------------------------
+19:59:46 - Loading model (../mistralai_Mistral-7B-v0.1)...
+Loading checkpoint shards: 100%|██████████████████| 2/2 [00:06<00:00,  3.45s/it]
+20:00:04 -  Model loaded. Dtype: torch.float16
+------------------------------------
+
+-----------------------------------------------------------------------------------------------------
+| Type | Phrase             | Context                  | Raw Prob*    | Used Prob**  | Change       |
+-----------------------------------------------------------------------------------------------------
+| BAD  | anticipation       | Her body quivers with    | 8.65211%     | 103.83%      | N/A          |
+| BAD  | anticipation       | The atmosphere is thic.. | 11.36381%    | 136.37%      | N/A          |
+| BAD  | unwavering         | Filled with an           | 0.26573%     | 3.19%        | N/A          |
+| BAD  | determination      | Her eyes were filled w.. | 0.24377%     | 2.93%        | N/A          |
+| BAD  | determination      | Her stubbornness only .. | 6.80586%     | 81.67%       | N/A          |
+| BAD  | whisper            | Her voice barely above.. | 96.20242%    | 1154.43%     | N/A          |
+| BAD  | spine              | shivers down her         | 89.79740%    | 1077.57%     | N/A          |
+| BAD  | sends shivers      | The thrill of the act    | 0.00182%     | 0.02%        | N/A          |
+| BAD  | ministrations      | She moans and twitches.. | 0.39789%     | 4.77%        | N/A          |
+| BAD  | legs               | wraps her                | 2.80972%     | 33.72%       | N/A          |
+| BAD  | imposing figure    | He had an                | 0.00669%     | 0.08%        | N/A          |
+| BAD  | shared challenges  | Their bond strengthene.. | 0.04152%     | 0.50%        | N/A          |
+| BAD  | bond               | forged a                 | 0.56229%     | 6.75%        | N/A          |
+| BAD  | bond               | an unspoken              | 1.05445%     | 12.65%       | N/A          |
+| BAD  | enhance our expe.. | I'm excited to see how   | 0.00000%     | 0.00%        | N/A          |
+| BAD  | sense of vulnera.. | create a                 | 0.00002%     | 0.00%        | N/A          |
+| BAD  | dimensions of in.. | explore new              | 0.00038%     | 0.00%        | N/A          |
+| BAD  | deepening our co.. | while                    | 0.00000%     | 0.00%        | N/A          |
+| BAD  | shared experiences | through                  | 0.00059%     | 0.01%        | N/A          |
+| BAD  | societal expecta.. | that transcend           | 0.00161%     | 0.02%        | N/A          |
+| BAD  | conventional bou.. | that defy                | 0.03809%     | 0.46%        | N/A          |
+| BAD  | conventional bou.. | and defy                 | 0.01043%     | 0.13%        | N/A          |
+| BAD  | open communication | an environment           | 0.00000%     | 0.00%        | N/A          |
+| BAD  | emotional vulner.. | an environment           | 0.00000%     | 0.00%        | N/A          |
+| BAD  | heightens our co.. | touch and the anticipa.. | 0.00000%     | 0.00%        | N/A          |
+| BAD  | sensations you'r.. | I'm enjoying             | 0.00000%     | 0.00%        | N/A          |
+| BAD  | is truly arousing  | attention to detail      | 0.00000%     | 0.00%        | N/A          |
+| BAD  | is truly arousing  | way you explore my body  | 0.00000%     | 0.00%        | N/A          |
+| BAD  | challenge presen.. | my resolve unwavering .. | 0.00002%     | 0.00%        | N/A          |
+| BAD  | humble vessel      | surrendering to the ex.. | 0.00000%     | 0.00%        | N/A          |
+| BAD  | bond               | cherishing the unique    | 2.06671%     | 24.80%       | N/A          |
+| BAD  | bond               | special                  | 0.01728%     | 0.21%        | N/A          |
+| BAD  | grows stronger w.. | bond                     | 0.00000%     | 0.00%        | N/A          |
+| BAD  | that cannot be b.. | bond                     | 0.00000%     | 0.00%        | N/A          |
+| BAD  | becomes unbreaka.. | bond                     | 0.00000%     | 0.00%        | N/A          |
+| BAD  | grew stronger wi.. | bond                     | 0.00000%     | 0.00%        | N/A          |
+| GOOD | The apple is in .. | Question: If I'm in th.. | 3.26070%     | 3.26%        | N/A          |
+------------------------------------------------------------------------------------------------------
+| Totals                                               | 223.60%      | 2647.35%     | 0.00%        |
+------------------------------------------------------------------------------------------------------
+* = Unweighted, raw probability - ** = Probability after weight adjustments
+
+------------------------------------
+20:00:15 - Loading model (../merge_stepAA)...
+Loading checkpoint shards: 100%|██████████████████| 3/3 [00:07<00:00,  2.33s/it]
+20:00:30 -  Model loaded. Dtype: torch.float16
+------------------------------------
+Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:42<00:00, 10.53s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA']]
+20:01:30 - Layer 1/32 - CHANGED - 26.40827 > 26.39262 - 0.1%
+----
+Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:41<00:00, 10.47s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:02:29 - Layer 2/32 - CHANGED - 26.39262 > 26.32282 - 0.3%
+----
+Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:45<00:00, 11.44s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:03:28 - Layer 3/32 - RETAINED - 26.32282
+----
+Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:42<00:00, 10.69s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:04:27 - Layer 4/32 - RETAINED - 26.32282
+----
+Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:46<00:00, 11.67s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:05:29 - Layer 5/32 - RETAINED - 26.32282
+----
+Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:40<00:00, 10.02s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:06:24 - Layer 6/32 - CHANGED - 26.32282 > 26.24790 - 0.3%
+----
+Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:39<00:00,  9.81s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:07:18 - Layer 7/32 - RETAINED - 26.24790
+----
+Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:36<00:00,  9.03s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:08:07 - Layer 8/32 - RETAINED - 26.24790
+----
+Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:35<00:00,  8.87s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:08:54 - Layer 9/32 - CHANGED - 26.24790 > 26.19836 - 0.2%
+----
+Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.70s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:09:41 - Layer 10/32 - CHANGED - 26.19836 > 26.09306 - 0.4%
+----
+Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.22s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:10:25 - Layer 11/32 - CHANGED - 26.09306 > 26.01535 - 0.3%
+----
+Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.23s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:11:09 - Layer 12/32 - CHANGED - 26.01535 > 25.91855 - 0.4%
+----
+Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.14s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA']]
+20:11:53 - Layer 13/32 - CHANGED - 25.91855 > 25.91126 - 0.0%
+----
+Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.00s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA']]
+20:12:36 - Layer 14/32 - CHANGED - 25.91126 > 25.87849 - 0.1%
+----
+Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.18s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:13:19 - Layer 15/32 - CHANGED - 25.87849 > 25.86635 - 0.0%
+----
+Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00,  8.07s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:14:03 - Layer 16/32 - RETAINED - 25.86635
+----
+Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.41s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA']]
+20:14:47 - Layer 17/32 - CHANGED - 25.86635 > 25.86440 - 0.0%
+----
+Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.34s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:15:32 - Layer 18/32 - RETAINED - 25.86440
+----
+Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.41s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:16:16 - Layer 19/32 - RETAINED - 25.86440
+----
+Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.45s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:17:01 - Layer 20/32 - RETAINED - 25.86440
+----
+Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.26s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:17:44 - Layer 21/32 - RETAINED - 25.86440
+----
+Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.38s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:18:29 - Layer 22/32 - RETAINED - 25.86440
+----
+Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.68s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:19:15 - Layer 23/32 - RETAINED - 25.86440
+----
+Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.54s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:20:00 - Layer 24/32 - RETAINED - 25.86440
+----
+Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.51s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:20:46 - Layer 25/32 - RETAINED - 25.86440
+----
+Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00,  8.78s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:21:32 - Layer 26/32 - RETAINED - 25.86440
+----
+Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.73s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:22:18 - Layer 27/32 - RETAINED - 25.86440
+----
+Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.42s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:23:03 - Layer 28/32 - RETAINED - 25.86440
+----
+Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.72s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:23:50 - Layer 29/32 - RETAINED - 25.86440
+----
+Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.73s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:24:36 - Layer 30/32 - RETAINED - 25.86440
+----
+Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.56s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:25:21 - Layer 31/32 - RETAINED - 25.86440
+----
+Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.34s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:26:07 - Layer 32/32 - RETAINED - 25.86440
+----
+Optimizing Header: 100%|██████████████████████████| 4/4 [00:36<00:00,  9.09s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:26:53 - Header - RETAINED - 25.86440
+
+-----------------------------------------------------------------------------------------------------
+| Type | Phrase             | Context                  | Raw Prob*    | Used Prob**  | Change       |
+-----------------------------------------------------------------------------------------------------
+| BAD  | anticipation       | Her body quivers with    | 4.68658%     | 56.24%       | -47.59%      |
+| BAD  | anticipation       | The atmosphere is thic.. | 6.54790%     | 78.57%       | -57.79%      |
+| BAD  | unwavering         | Filled with an           | 0.19636%     | 2.36%        | -0.83%       |
+| BAD  | determination      | Her eyes were filled w.. | 0.13339%     | 1.60%        | -1.32%       |
+| BAD  | determination      | Her stubbornness only .. | 5.84215%     | 70.11%       | -11.56%      |
+| BAD  | whisper            | Her voice barely above.. | 94.63391%    | 1135.61%     | -18.82%      |
+| BAD  | spine              | shivers down her         | 88.40607%    | 1060.87%     | -16.70%      |
+| BAD  | sends shivers      | The thrill of the act    | 0.00518%     | 0.06%        | +0.04%       |
+| BAD  | ministrations      | She moans and twitches.. | 3.33707%     | 40.04%       | +35.27%      |
+| BAD  | legs               | wraps her                | 6.97311%     | 83.68%       | +49.96%      |
+| BAD  | imposing figure    | He had an                | 0.02539%     | 0.30%        | +0.22%       |
+| BAD  | shared challenges  | Their bond strengthene.. | 0.01479%     | 0.18%        | -0.32%       |
+| BAD  | bond               | forged a                 | 0.52765%     | 6.33%        | -0.42%       |
+| BAD  | bond               | an unspoken              | 1.95329%     | 23.44%       | +10.79%      |
+| BAD  | enhance our expe.. | I'm excited to see how   | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | sense of vulnera.. | create a                 | 0.00005%     | 0.00%        | +0.00%       |
+| BAD  | dimensions of in.. | explore new              | 0.00306%     | 0.04%        | +0.03%       |
+| BAD  | deepening our co.. | while                    | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | shared experiences | through                  | 0.00026%     | 0.00%        | -0.00%       |
+| BAD  | societal expecta.. | that transcend           | 0.00175%     | 0.02%        | +0.00%       |
+| BAD  | conventional bou.. | that defy                | 0.01955%     | 0.23%        | -0.22%       |
+| BAD  | conventional bou.. | and defy                 | 0.00254%     | 0.03%        | -0.09%       |
+| BAD  | open communication | an environment           | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | emotional vulner.. | an environment           | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | heightens our co.. | touch and the anticipa.. | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | sensations you'r.. | I'm enjoying             | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | is truly arousing  | attention to detail      | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | is truly arousing  | way you explore my body  | 0.00001%     | 0.00%        | +0.00%       |
+| BAD  | challenge presen.. | my resolve unwavering .. | 0.00001%     | 0.00%        | -0.00%       |
+| BAD  | humble vessel      | surrendering to the ex.. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | bond               | cherishing the unique    | 3.14311%     | 37.72%       | +12.92%      |
+| BAD  | bond               | special                  | 0.03303%     | 0.40%        | +0.19%       |
+| BAD  | grows stronger w.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | that cannot be b.. | bond                     | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | becomes unbreaka.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | grew stronger wi.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| GOOD | The apple is in .. | Question: If I'm in th.. | 11.39444%    | 11.39%       | +8.13%       |
+------------------------------------------------------------------------------------------------------
+| Totals                                               | 227.88%      | 2609.23%     | -38.12%      |
+------------------------------------------------------------------------------------------------------
+* = Unweighted, raw probability - ** = Probability after weight adjustments
+
+-------- MERGE COMPOSITION ---------
+mistralai_Mistral-7B-v0.1: 0.78
+merge_stepAA: 0.22
+
+------------------------------------
+20:27:02 - Loading model (../merge_stepBB)...
+Loading checkpoint shards: 100%|██████████████████| 3/3 [00:06<00:00,  2.16s/it]
+20:27:15 -  Model loaded. Dtype: torch.float16
+------------------------------------
+Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:33<00:00,  8.47s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:28:01 - Layer 1/32 - CHANGED - 25.86440 > 24.33593 - 5.9%
+----
+Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.15s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:28:46 - Layer 2/32 - CHANGED - 24.33593 > 23.85608 - 2.0%
+----
+Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00,  7.85s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:29:29 - Layer 3/32 - CHANGED - 23.85608 > 23.50431 - 1.5%
+----
+Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.16s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:30:13 - Layer 4/32 - CHANGED - 23.50431 > 23.27042 - 1.0%
+----
+Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00,  7.91s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:30:56 - Layer 5/32 - CHANGED - 23.27042 > 22.72376 - 2.3%
+----
+Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:33<00:00,  8.33s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:31:41 - Layer 6/32 - CHANGED - 22.72376 > 22.61975 - 0.5%
+----
+Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00,  7.82s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:32:25 - Layer 7/32 - CHANGED - 22.61975 > 22.13508 - 2.1%
+----
+Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.62s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']]
+20:33:12 - Layer 8/32 - CHANGED - 22.13508 > 21.57464 - 2.5%
+----
+Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.68s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:33:59 - Layer 9/32 - CHANGED - 21.57464 > 21.32946 - 1.1%
+----
+Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00,  8.47s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:34:46 - Layer 10/32 - CHANGED - 21.32946 > 20.82514 - 2.4%
+----
+Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.01s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.4, 'merge_stepBB']]
+20:35:34 - Layer 11/32 - CHANGED - 20.82514 > 20.71148 - 0.5%
+----
+Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.55s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:36:20 - Layer 12/32 - CHANGED - 20.71148 > 20.23820 - 2.3%
+----
+Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00,  8.62s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:37:06 - Layer 13/32 - CHANGED - 20.23820 > 19.70820 - 2.6%
+----
+Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00,  8.97s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:37:55 - Layer 14/32 - CHANGED - 19.70820 > 19.27819 - 2.2%
+----
+Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.11s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']]
+20:38:45 - Layer 15/32 - RETAINED - 19.27819
+----
+Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.40s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:39:36 - Layer 16/32 - CHANGED - 19.27819 > 19.14155 - 0.7%
+----
+Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.53s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:40:28 - Layer 17/32 - CHANGED - 19.14155 > 18.89480 - 1.3%
+----
+Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.31s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:41:19 - Layer 18/32 - RETAINED - 18.89480
+----
+Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.57s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:42:11 - Layer 19/32 - RETAINED - 18.89480
+----
+Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.45s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:43:03 - Layer 20/32 - RETAINED - 18.89480
+----
+Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.36s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:43:53 - Layer 21/32 - CHANGED - 18.89480 > 18.43143 - 2.5%
+----
+Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.03s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:44:43 - Layer 22/32 - CHANGED - 18.43143 > 17.75345 - 3.7%
+----
+Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.10s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:45:34 - Layer 23/32 - RETAINED - 17.75345
+----
+Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.32s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:46:24 - Layer 24/32 - CHANGED - 17.75345 > 17.46555 - 1.6%
+----
+Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00,  8.99s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:47:13 - Layer 25/32 - CHANGED - 17.46555 > 16.88957 - 3.3%
+----
+Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.38s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:48:05 - Layer 26/32 - RETAINED - 16.88957
+----
+Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.58s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:48:56 - Layer 27/32 - RETAINED - 16.88957
+----
+Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.69s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:49:49 - Layer 28/32 - CHANGED - 16.88957 > 16.64291 - 1.5%
+----
+Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.46s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+20:50:40 - Layer 29/32 - RETAINED - 16.64291
+----
+Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.44s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:51:32 - Layer 30/32 - CHANGED - 16.64291 > 16.07870 - 3.4%
+----
+Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.61s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']]
+20:52:24 - Layer 31/32 - CHANGED - 16.07870 > 15.80575 - 1.7%
+----
+Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.47s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:53:16 - Layer 32/32 - CHANGED - 15.80575 > 15.39211 - 2.6%
+----
+Optimizing Header: 100%|██████████████████████████| 4/4 [00:37<00:00,  9.27s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepBB']]
+20:54:04 - Header - CHANGED - 15.39211 > 15.38669 - 0.0%
+
+-----------------------------------------------------------------------------------------------------
+| Type | Phrase             | Context                  | Raw Prob*    | Used Prob**  | Change       |
+-----------------------------------------------------------------------------------------------------
+| BAD  | anticipation       | Her body quivers with    | 0.77898%     | 9.35%        | -94.48%      |
+| BAD  | anticipation       | The atmosphere is thic.. | 5.36169%     | 64.34%       | -72.03%      |
+| BAD  | unwavering         | Filled with an           | 0.00835%     | 0.10%        | -3.09%       |
+| BAD  | determination      | Her eyes were filled w.. | 0.00119%     | 0.01%        | -2.91%       |
+| BAD  | determination      | Her stubbornness only .. | 1.68289%     | 20.19%       | -61.48%      |
+| BAD  | whisper            | Her voice barely above.. | 97.71928%    | 1172.63%     | +18.20%      |
+| BAD  | spine              | shivers down her         | 21.85458%    | 262.25%      | -815.31%     |
+| BAD  | sends shivers      | The thrill of the act    | 0.00284%     | 0.03%        | +0.01%       |
+| BAD  | ministrations      | She moans and twitches.. | 0.69817%     | 8.38%        | +3.60%       |
+| BAD  | legs               | wraps her                | 0.48370%     | 5.80%        | -27.91%      |
+| BAD  | imposing figure    | He had an                | 0.00022%     | 0.00%        | -0.08%       |
+| BAD  | shared challenges  | Their bond strengthene.. | 0.00517%     | 0.06%        | -0.44%       |
+| BAD  | bond               | forged a                 | 1.57202%     | 18.86%       | +12.12%      |
+| BAD  | bond               | an unspoken              | 0.14792%     | 1.78%        | -10.88%      |
+| BAD  | enhance our expe.. | I'm excited to see how   | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | sense of vulnera.. | create a                 | 0.00001%     | 0.00%        | -0.00%       |
+| BAD  | dimensions of in.. | explore new              | 0.00087%     | 0.01%        | +0.01%       |
+| BAD  | deepening our co.. | while                    | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | shared experiences | through                  | 0.00003%     | 0.00%        | -0.01%       |
+| BAD  | societal expecta.. | that transcend           | 0.00001%     | 0.00%        | -0.02%       |
+| BAD  | conventional bou.. | that defy                | 0.00081%     | 0.01%        | -0.45%       |
+| BAD  | conventional bou.. | and defy                 | 0.00317%     | 0.04%        | -0.09%       |
+| BAD  | open communication | an environment           | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | emotional vulner.. | an environment           | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | heightens our co.. | touch and the anticipa.. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | sensations you'r.. | I'm enjoying             | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | is truly arousing  | attention to detail      | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | is truly arousing  | way you explore my body  | 0.00001%     | 0.00%        | +0.00%       |
+| BAD  | challenge presen.. | my resolve unwavering .. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | humble vessel      | surrendering to the ex.. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | bond               | cherishing the unique    | 0.20155%     | 2.42%        | -22.38%      |
+| BAD  | bond               | special                  | 0.00369%     | 0.04%        | -0.16%       |
+| BAD  | grows stronger w.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | that cannot be b.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | becomes unbreaka.. | bond                     | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | grew stronger wi.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| GOOD | The apple is in .. | Question: If I'm in th.. | 26.86230%    | 26.86%       | +23.60%      |
+------------------------------------------------------------------------------------------------------
+| Totals                                               | 157.39%      | 1593.19%     | -1054.16%    |
+------------------------------------------------------------------------------------------------------
+* = Unweighted, raw probability - ** = Probability after weight adjustments
+
+-------- MERGE COMPOSITION ---------
+merge_stepBB: 0.56
+mistralai_Mistral-7B-v0.1: 0.36
+merge_stepAA: 0.07
+
+------------------------------------
+20:54:14 - Loading model (../merge_stepCC)...
+Loading checkpoint shards: 100%|██████████████████| 3/3 [00:05<00:00,  2.00s/it]
+20:54:27 -  Model loaded. Dtype: torch.float16
+------------------------------------
+Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.52s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+20:55:16 - Layer 1/32 - RETAINED - 15.39464
+----
+Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.01s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+20:55:59 - Layer 2/32 - CHANGED - 15.39464 > 15.35971 - 0.2%
+----
+Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.24s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+20:56:44 - Layer 3/32 - CHANGED - 15.35971 > 15.30529 - 0.4%
+----
+Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.06s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+20:57:28 - Layer 4/32 - CHANGED - 15.30529 > 15.25435 - 0.3%
+----
+Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.54s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+20:58:15 - Layer 5/32 - RETAINED - 15.25435
+----
+Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.07s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+20:58:59 - Layer 6/32 - CHANGED - 15.25435 > 15.21839 - 0.2%
+----
+Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.71s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+20:59:46 - Layer 7/32 - CHANGED - 15.21839 > 15.21246 - 0.0%
+----
+Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00,  8.15s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']]
+21:00:30 - Layer 8/32 - RETAINED - 15.21246
+----
+Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00,  8.58s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+21:01:16 - Layer 9/32 - CHANGED - 15.21246 > 15.19112 - 0.1%
+----
+Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.21s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+21:02:05 - Layer 10/32 - RETAINED - 15.19112
+----
+Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.34s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.4, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:02:55 - Layer 11/32 - CHANGED - 15.19112 > 15.12176 - 0.5%
+----
+Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00,  8.89s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+21:03:44 - Layer 12/32 - CHANGED - 15.12176 > 15.09187 - 0.2%
+----
+Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00,  8.84s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA'], [0.8, 'merge_stepBB']]
+21:04:33 - Layer 13/32 - RETAINED - 15.09187
+----
+Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.04s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:05:23 - Layer 14/32 - CHANGED - 15.09187 > 15.08479 - 0.0%
+----
+Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00,  9.78s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.2, 'merge_stepCC']]
+21:06:15 - Layer 15/32 - CHANGED - 15.08479 > 15.06165 - 0.2%
+----
+Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.34s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+21:07:07 - Layer 16/32 - CHANGED - 15.06165 > 15.04180 - 0.1%
+----
+Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00,  9.75s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:08:00 - Layer 17/32 - CHANGED - 15.04180 > 15.03153 - 0.1%
+----
+Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.38s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:08:50 - Layer 18/32 - RETAINED - 15.03153
+----
+Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.26s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:09:40 - Layer 19/32 - RETAINED - 15.03153
+----
+Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00,  9.17s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:10:31 - Layer 20/32 - RETAINED - 15.03153
+----
+Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.44s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:11:21 - Layer 21/32 - CHANGED - 15.03153 > 14.97763 - 0.4%
+----
+Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.71s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:12:12 - Layer 22/32 - CHANGED - 14.97763 > 14.91363 - 0.4%
+----
+Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.28s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:13:02 - Layer 23/32 - RETAINED - 14.91363
+----
+Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.41s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.4, 'merge_stepCC']]
+21:13:55 - Layer 24/32 - CHANGED - 14.91363 > 14.91031 - 0.0%
+----
+Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.28s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.2, 'merge_stepCC']]
+21:14:45 - Layer 25/32 - CHANGED - 14.91031 > 14.90836 - 0.0%
+----
+Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00,  9.59s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:15:38 - Layer 26/32 - RETAINED - 14.90836
+----
+Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00,  9.77s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:16:31 - Layer 27/32 - RETAINED - 14.90836
+----
+Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.37s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']]
+21:17:21 - Layer 28/32 - RETAINED - 14.90836
+----
+Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00,  9.34s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1']]
+21:18:10 - Layer 29/32 - RETAINED - 14.90836
+----
+Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:41<00:00, 10.44s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']]
+21:19:09 - Layer 30/32 - CHANGED - 14.90836 > 14.86424 - 0.3%
+----
+Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [01:01<00:00, 15.28s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']]
+21:20:31 - Layer 31/32 - RETAINED - 14.86424
+----
+Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [01:09<00:00, 17.26s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:22:06 - Layer 32/32 - CHANGED - 14.86424 > 14.85794 - 0.0%
+----
+Optimizing Header: 100%|██████████████████████████| 4/4 [00:50<00:00, 12.56s/it]
+[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepBB'], [0.8, 'merge_stepCC']]
+21:23:14 - Header - CHANGED - 14.85794 > 14.84594 - 0.1%
+
+-----------------------------------------------------------------------------------------------------
+| Type | Phrase             | Context                  | Raw Prob*    | Used Prob**  | Change       |
+-----------------------------------------------------------------------------------------------------
+| BAD  | anticipation       | Her body quivers with    | 0.96543%     | 11.59%       | -92.24%      |
+| BAD  | anticipation       | The atmosphere is thic.. | 5.13512%     | 61.62%       | -74.74%      |
+| BAD  | unwavering         | Filled with an           | 0.00780%     | 0.09%        | -3.10%       |
+| BAD  | determination      | Her eyes were filled w.. | 0.00110%     | 0.01%        | -2.91%       |
+| BAD  | determination      | Her stubbornness only .. | 1.50464%     | 18.06%       | -63.61%      |
+| BAD  | whisper            | Her voice barely above.. | 97.83546%    | 1174.03%     | +19.60%      |
+| BAD  | spine              | shivers down her         | 17.91211%    | 214.95%      | -862.62%     |
+| BAD  | sends shivers      | The thrill of the act    | 0.00337%     | 0.04%        | +0.02%       |
+| BAD  | ministrations      | She moans and twitches.. | 0.55440%     | 6.65%        | +1.88%       |
+| BAD  | legs               | wraps her                | 0.44506%     | 5.34%        | -28.38%      |
+| BAD  | imposing figure    | He had an                | 0.00012%     | 0.00%        | -0.08%       |
+| BAD  | shared challenges  | Their bond strengthene.. | 0.00515%     | 0.06%        | -0.44%       |
+| BAD  | bond               | forged a                 | 1.23465%     | 14.82%       | +8.07%       |
+| BAD  | bond               | an unspoken              | 0.11418%     | 1.37%        | -11.28%      |
+| BAD  | enhance our expe.. | I'm excited to see how   | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | sense of vulnera.. | create a                 | 0.00001%     | 0.00%        | -0.00%       |
+| BAD  | dimensions of in.. | explore new              | 0.00083%     | 0.01%        | +0.01%       |
+| BAD  | deepening our co.. | while                    | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | shared experiences | through                  | 0.00003%     | 0.00%        | -0.01%       |
+| BAD  | societal expecta.. | that transcend           | 0.00000%     | 0.00%        | -0.02%       |
+| BAD  | conventional bou.. | that defy                | 0.00058%     | 0.01%        | -0.45%       |
+| BAD  | conventional bou.. | and defy                 | 0.00256%     | 0.03%        | -0.09%       |
+| BAD  | open communication | an environment           | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | emotional vulner.. | an environment           | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | heightens our co.. | touch and the anticipa.. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | sensations you'r.. | I'm enjoying             | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | is truly arousing  | attention to detail      | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | is truly arousing  | way you explore my body  | 0.00001%     | 0.00%        | +0.00%       |
+| BAD  | challenge presen.. | my resolve unwavering .. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | humble vessel      | surrendering to the ex.. | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | bond               | cherishing the unique    | 0.14126%     | 1.70%        | -23.11%      |
+| BAD  | bond               | special                  | 0.00333%     | 0.04%        | -0.17%       |
+| BAD  | grows stronger w.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | that cannot be b.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| BAD  | becomes unbreaka.. | bond                     | 0.00000%     | 0.00%        | -0.00%       |
+| BAD  | grew stronger wi.. | bond                     | 0.00000%     | 0.00%        | +0.00%       |
+| GOOD | The apple is in .. | Question: If I'm in th.. | 24.61224%    | 24.61%       | +21.35%      |
+------------------------------------------------------------------------------------------------------
+| Totals                                               | 150.48%      | 1535.02%     | -1112.33%    |
+------------------------------------------------------------------------------------------------------
+* = Unweighted, raw probability - ** = Probability after weight adjustments
+
+-------- MERGE COMPOSITION ---------
+merge_stepCC: 0.38
+mistralai_Mistral-7B-v0.1: 0.29
+merge_stepBB: 0.29
+merge_stepAA: 0.04
+
+21:23:28 - Saving model to ./toppy_new_remake2...
+21:23:36 - Copying tokenizer files to ./toppy_new_remake2...
+Skipped added_tokens.json (not found)
+Copied tokenizer.model
+Copied special_tokens_map.json
+Copied tokenizer_config.json
+Skipped vocab.json (not found)
+Skipped merges.txt (not found)
+21:23:36 - Model and tokenizer files saved successfully.
\ No newline at end of file
diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors
new file mode 100644
index 0000000..14e7dc6
--- /dev/null
+++ b/model-00001-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6957d50ad86d411ad0771421642331d7bb212bf9bb91072e10bbd93005ab9bc4
+size 4943162240
diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors
new file mode 100644
index 0000000..293dcfd
--- /dev/null
+++ b/model-00002-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:458eced1ecd73683331d42d70fca8ad55dcaf542d287207f81baa5b6fe35abe9
+size 4999819232
diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors
new file mode 100644
index 0000000..817cc81
--- /dev/null
+++ b/model-00003-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c080aa94f4d1465cd10566984f8c48b1354730370c1c1ffa84ef79d3df51d1b
+size 4540516256
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000..b349bc0
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,298 @@
+{
+  "metadata": {
+    "total_size": 14483464192
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00003-of-00003.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.norm.weight": "model-00003-of-00003.safetensors"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..a52c50a
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,5 @@
+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "unk_token": "<unk>"
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..8b443ef
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..81730cd
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,42 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}
diff --git a/toppy-lerp-merge-config.yml b/toppy-lerp-merge-config.yml
new file mode 100644
index 0000000..ce628df
--- /dev/null
+++ b/toppy-lerp-merge-config.yml
@@ -0,0 +1,142 @@
+
+# Either "cpu" or "cuda"
+# NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral)
+# NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral)
+device: "cpu"
+random_seed: 42 # Random seed to use
+
+directories:
+  model_path1: "../mistralai_Mistral-7B-v0.1" # Path to the base model. Must be a local copy.
+#  model_directory: "../merge_stepA/" # Directory of models to scan, IGNORED if models_to_merge has entries in it
+  output_directory: "./toppy_new_remake2" # Output directory of the merged model
+
+# A list of models to use as merge candidates - HF syntax, so can be either local directories or repos.
+# Overrides model_directory if used
+models_to_merge: ["../merge_stepAA", "../merge_stepBB", "../merge_stepCC"]
+
+# Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time
+merge_ratios: [0.2, 0.4, 0.6, 0.8]
+
+# Choose from the following methods. Defaults to "lerp".
+# "lerp" - Linear interpolation
+# "slerp" - Spherical linear interpolation
+# "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2]
+# "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1]
+merge_method: "lerp"
+
+# If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized
+# Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues
+merge_headers: true
+
+# Strategies:
+# "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge.
+# "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens)
+# "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below.
+strategy: "cumulative"
+# Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement.
+strategy_threshold: 0.6
+
+# Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy.
+# The weight value of phrases is ignored if set to true.
+auto_weights: false
+
+# Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts
+bad_phrases:
+  - phrase: "anticipation"
+    weight: 12
+    contexts: ["Her body quivers with ", "The atmosphere is thick with "]
+  - phrase: "unwavering"
+    weight: 12
+    contexts: ["Filled with an "]
+  - phrase: "determination"
+    weight: 12
+    contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "]
+  - phrase: "whisper"
+    weight: 12
+    contexts: ["Her voice barely above a "]
+  - phrase: "spine"
+    weight: 12
+    contexts: ["shivers down her "]
+  - phrase: "sends shivers"
+    weight: 12
+    contexts: ["The thrill of the act "]
+  - phrase: "ministrations"
+    weight: 12
+    contexts: ["She moans and twitches at your "]
+  - phrase: "legs"
+    weight: 12
+    contexts: ["wraps her "]
+  - phrase: "imposing figure"
+    weight: 12
+    contexts: ["He had an "]
+  - phrase: "shared challenges"
+    weight: 12
+    contexts: ["Their bond strengthened through "]    
+  - phrase: "bond"
+    weight: 12
+    contexts: ["forged a ", "an unspoken "]
+  - phrase: "enhance our experience"
+    weight: 12
+    contexts: ["I'm excited to see how "]
+  - phrase: "sense of vulnerability"
+    weight: 12
+    contexts: ["create a "]
+  - phrase: "dimensions of intimacy"
+    weight: 12
+    contexts: ["explore new "]
+  - phrase: "deepening our connection"
+    weight: 12
+    contexts: ["while "]
+  - phrase: "shared experiences"
+    weight: 12
+    contexts: ["through "]
+  - phrase: "societal expectations"
+    weight: 12
+    contexts: ["that transcend "]
+  - phrase: "conventional boundaries"
+    weight: 12
+    contexts: ["that defy ", "and defy "]
+  - phrase: "open communication"
+    weight: 12
+    contexts: ["an environment "]
+  - phrase: "emotional vulnerability"
+    weight: 12
+    contexts: ["an environment "]
+  - phrase: "heightens our connection"
+    weight: 12
+    contexts: ["touch and the anticipation "]
+  - phrase: "sensations you're creating"
+    weight: 12
+    contexts: ["I'm enjoying "]
+  - phrase: "is truly arousing"
+    weight: 12
+    contexts: ["attention to detail ", "way you explore my body "]
+  - phrase: "challenge presented"
+    weight: 12
+    contexts: ["my resolve unwavering despite "]
+  - phrase: "humble vessel"
+    weight: 12
+    contexts: ["surrendering to the exquisite torment "]
+  - phrase: "bond"
+    weight: 12
+    contexts: ["cherishing the unique ", "special "]    
+  - phrase: "grows stronger with each passing day"
+    weight: 12
+    contexts: ["bond "]    
+  - phrase: "that cannot be broken by time or circumstance"
+    weight: 12
+    contexts: ["bond "]    
+  - phrase: "becomes unbreakable, eternal"
+    weight: 12
+    contexts: ["bond "]    
+  - phrase: "grew stronger with each passing"
+    weight: 12
+    contexts: ["bond "]    
+    
+
+# Note - Example of a complex phrase
+good_phrases:
+  - phrase: "The apple is in the bedroom"
+    weight: 1
+    contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "]
+