From d3d62fb59ca6265a2cf47834fd83e0c47a437b55 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 5 May 2026 19:51:37 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: TeeZee/Buttocks-7B-v1.1 Source: Original Platform --- .gitattributes | 35 ++ README.md | 147 +++++++ config.json | 26 ++ generation_config.json | 6 + mergemonster_toppy_lerp.txt | 651 +++++++++++++++++++++++++++++++ model-00001-of-00003.safetensors | 3 + model-00002-of-00003.safetensors | 3 + model-00003-of-00003.safetensors | 3 + model.safetensors.index.json | 298 ++++++++++++++ special_tokens_map.json | 5 + tokenizer.model | 3 + tokenizer_config.json | 42 ++ toppy-lerp-merge-config.yml | 142 +++++++ 13 files changed, 1364 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 mergemonster_toppy_lerp.txt create mode 100644 model-00001-of-00003.safetensors create mode 100644 model-00002-of-00003.safetensors create mode 100644 model-00003-of-00003.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 toppy-lerp-merge-config.yml diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..6e97a61 --- /dev/null +++ b/README.md @@ -0,0 +1,147 @@ +--- +license: cc-by-nc-4.0 +tags: +- not-for-all-audiences +- merge +model-index: +- name: Buttocks-7B-v1.1 + results: + - task: + type: text-generation + name: Text Generation + dataset: + name: AI2 Reasoning Challenge (25-Shot) + type: ai2_arc + config: ARC-Challenge + split: test + args: + num_few_shot: 25 + metrics: + - type: acc_norm + value: 54.61 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: HellaSwag (10-Shot) + type: hellaswag + split: validation + args: + num_few_shot: 10 + metrics: + - type: acc_norm + value: 75.61 + name: normalized accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: MMLU (5-Shot) + type: cais/mmlu + config: all + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 50.22 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: TruthfulQA (0-shot) + type: truthful_qa + config: multiple_choice + split: validation + args: + num_few_shot: 0 + metrics: + - type: mc2 + value: 44.72 + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: Winogrande (5-shot) + type: winogrande + config: winogrande_xl + split: validation + args: + num_few_shot: 5 + metrics: + - type: acc + value: 68.9 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard + - task: + type: text-generation + name: Text Generation + dataset: + name: GSM8k (5-shot) + type: gsm8k + config: main + split: test + args: + num_few_shot: 5 + metrics: + - type: acc + value: 5.76 + name: accuracy + source: + url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=TeeZee/Buttocks-7B-v1.1 + name: Open LLM Leaderboard +--- + +### Buttocks 7B v1.1 ### + +An experiment that has gone very, very wrong. + +### Model details ### + +- Recreation of the original recipe for [Undi95/Toppy-M-7B](https://huggingface.co/Undi95/Toppy-M-7B), but instead of final merge done by mergekit, [MergeMoster](https://github.com/Gryphe/MergeMonster/) was used with extended RPG preset. +- recipe in [mergekit-config](https://huggingface.co/TeeZee/Toppy-7B-remake-mergemonster-SLERP-v1.0/resolve/main/toppy-slerp-merge-config.yml), stepsAA, BB, CC are the original models with LORAS as per Toppy M 7B sauce. +- LERP merge method was used + +### Results ### + +- in simple terms this model is totally unhinged +- it always produces sequences similar to fever dreams or drug trips +- on a good day it can produce scenarios similar to old Monty Python sketches +- models shows incredible affinity to words like 'ass', 'buttocks', 'farts', prompting with those single words will probably + produce a whole story revolving around those topics. + +### Possible uses ### + +- to generate dream sequence in a story +- to make the boring model more unpredictable by merging at low weights with this monster +- to take a break, connect Silly Tavern to this model and get a few ROTFLs observing how every story deteriorates into pure craziness +- research on LLM hallucinations +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_TeeZee__Buttocks-7B-v1.1) + +| Metric |Value| +|---------------------------------|----:| +|Avg. |49.97| +|AI2 Reasoning Challenge (25-Shot)|54.61| +|HellaSwag (10-Shot) |75.61| +|MMLU (5-Shot) |50.22| +|TruthfulQA (0-shot) |44.72| +|Winogrande (5-shot) |68.90| +|GSM8k (5-shot) | 5.76| + diff --git a/config.json b/config.json new file mode 100644 index 0000000..3b9e285 --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "../mistralai_Mistral-7B-v0.1", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.36.2", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..c533f93 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.36.2" +} diff --git a/mergemonster_toppy_lerp.txt b/mergemonster_toppy_lerp.txt new file mode 100644 index 0000000..165e1d8 --- /dev/null +++ b/mergemonster_toppy_lerp.txt @@ -0,0 +1,651 @@ + +⠀⠀⠀⠀⠀⠀⣀⡀⠀⠀⣀⣤⣶⣾⣿⣿⣷⣶⣤⣀⠀⠀⣀⣀⠀⠀⠀⠀⠀⠀ +⠀⠀⠀⠀⠀⠜⠉⣿⡆⣼⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣧⢰⣿⠉⠃⠀⠀⠀⠀⠀ +⠀⢀⣤⣴⣦⣄⣴⠟⣸⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡎⢻⣦⣠⣴⣦⣄⠀⠀ +⠀⡞⠁⣠⣾⢿⣧⠀⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⣽⡿⣷⣄⠈⢷⠀ +⠀⣠⣾⠟⠁⢸⣿⠀⠘⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⡿⠁⠀⣿⡇⠈⠻⣷⣄⠀ +⣰⡿⠁⠀⢀⣾⣏⣾⣄⣰⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣇⣰⣷⣹⣷⠀⠀⠈⢿⣆ +⣿⡇⠀⢠⣾⠏⢸⣿⣿⣿⣿⠋⢻⣿⣿⣿⣿⡟⠙⣿⣿⣿⣿⡇⠹⣷⡀⠀⢸⣿ +⠹⣿⣴⡿⠋⠀⠈⠛⠉⣹⣿⣦⣄⡹⣿⣿⣋⣠⣶⣿⣏⠉⠛⠁⠀⠙⢿⣦⣿⠏ +⠀⣸⣿⠿⠿⣿⣾⣿⡿⠿⣿⣿⣿⣿⡆⢰⣿⣿⣿⣿⠿⢿⣿⣶⣿⠿⠿⣻⣇⠀ +⠀⣿⡇⢀⣴⣶⣤⣀⣴⣿⠿⣻⡿⣿⣧⣾⣿⢿⣟⠿⣿⣦⣀⣤⣶⣦⠀⢸⣿⠀ +⠀⢿⣧⠈⠃⢀⣵⣿⡋⠁⢀⣿⡷⣿⡇⢻⣿⣿⣿⡀⠈⢛⣿⣮⡀⠘⠀⣼⡟⠀ +⠀⠈⠻⣷⣤⣟⣋⣿⣧⣴⡿⠋⠀⣿⡇⢸⣿⠀⠙⢿⣦⣼⣿⣙⣻⣤⣾⠟⠁⠀ +⠀⠀⠀⠈⢽⣿⠛⢻⣏⢉⣤⣶⣶⣿⠁⠈⣿⣶⣶⣤⡉⣽⡟⠛⣿⡏⠁⠀⠀⠀ +⠀⠀⠀⠀⠈⠿⣷⣾⣾⣟⣉⣠⣿⢿⡇⢸⠿⣿⣄⣙⣻⣷⣷⣾⠿⠁⠀⠀⠀⠀ +⠀⠀⠀⠀⠀⠀⠀⠀⠙⠻⠿⠛⢁⡼⠃⠘⢦⡈⠛⠿⠟⠃⠀⠀⠀⠀⠀⠀⠀⠀ + +19:59:46 - THE MERGE MONSTER HUNGERS +------------------------------------ +Device : cpu +Random seed : 42 +Starting model : ../mistralai_Mistral-7B-v0.1 +Models to merge : ['../merge_stepAA', '../merge_stepBB', '../merge_stepCC'] +Output directory : ./toppy_new_remake2 +Phrases loaded : 31 +Auto weights : False +Merge ratios : [0.2, 0.4, 0.6, 0.8] +Merge method(s) : ['lerp'] +Merge headers : True +Strategy used : cumulative +------------------------------------ +19:59:46 - Loading model (../mistralai_Mistral-7B-v0.1)... +Loading checkpoint shards: 100%|██████████████████| 2/2 [00:06<00:00, 3.45s/it] +20:00:04 - Model loaded. Dtype: torch.float16 +------------------------------------ + +----------------------------------------------------------------------------------------------------- +| Type | Phrase | Context | Raw Prob* | Used Prob** | Change | +----------------------------------------------------------------------------------------------------- +| BAD | anticipation | Her body quivers with | 8.65211% | 103.83% | N/A | +| BAD | anticipation | The atmosphere is thic.. | 11.36381% | 136.37% | N/A | +| BAD | unwavering | Filled with an | 0.26573% | 3.19% | N/A | +| BAD | determination | Her eyes were filled w.. | 0.24377% | 2.93% | N/A | +| BAD | determination | Her stubbornness only .. | 6.80586% | 81.67% | N/A | +| BAD | whisper | Her voice barely above.. | 96.20242% | 1154.43% | N/A | +| BAD | spine | shivers down her | 89.79740% | 1077.57% | N/A | +| BAD | sends shivers | The thrill of the act | 0.00182% | 0.02% | N/A | +| BAD | ministrations | She moans and twitches.. | 0.39789% | 4.77% | N/A | +| BAD | legs | wraps her | 2.80972% | 33.72% | N/A | +| BAD | imposing figure | He had an | 0.00669% | 0.08% | N/A | +| BAD | shared challenges | Their bond strengthene.. | 0.04152% | 0.50% | N/A | +| BAD | bond | forged a | 0.56229% | 6.75% | N/A | +| BAD | bond | an unspoken | 1.05445% | 12.65% | N/A | +| BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | N/A | +| BAD | sense of vulnera.. | create a | 0.00002% | 0.00% | N/A | +| BAD | dimensions of in.. | explore new | 0.00038% | 0.00% | N/A | +| BAD | deepening our co.. | while | 0.00000% | 0.00% | N/A | +| BAD | shared experiences | through | 0.00059% | 0.01% | N/A | +| BAD | societal expecta.. | that transcend | 0.00161% | 0.02% | N/A | +| BAD | conventional bou.. | that defy | 0.03809% | 0.46% | N/A | +| BAD | conventional bou.. | and defy | 0.01043% | 0.13% | N/A | +| BAD | open communication | an environment | 0.00000% | 0.00% | N/A | +| BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | N/A | +| BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | N/A | +| BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | N/A | +| BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | N/A | +| BAD | is truly arousing | way you explore my body | 0.00000% | 0.00% | N/A | +| BAD | challenge presen.. | my resolve unwavering .. | 0.00002% | 0.00% | N/A | +| BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | N/A | +| BAD | bond | cherishing the unique | 2.06671% | 24.80% | N/A | +| BAD | bond | special | 0.01728% | 0.21% | N/A | +| BAD | grows stronger w.. | bond | 0.00000% | 0.00% | N/A | +| BAD | that cannot be b.. | bond | 0.00000% | 0.00% | N/A | +| BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | N/A | +| BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | N/A | +| GOOD | The apple is in .. | Question: If I'm in th.. | 3.26070% | 3.26% | N/A | +------------------------------------------------------------------------------------------------------ +| Totals | 223.60% | 2647.35% | 0.00% | +------------------------------------------------------------------------------------------------------ +* = Unweighted, raw probability - ** = Probability after weight adjustments + +------------------------------------ +20:00:15 - Loading model (../merge_stepAA)... +Loading checkpoint shards: 100%|██████████████████| 3/3 [00:07<00:00, 2.33s/it] +20:00:30 - Model loaded. Dtype: torch.float16 +------------------------------------ +Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:42<00:00, 10.53s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA']] +20:01:30 - Layer 1/32 - CHANGED - 26.40827 > 26.39262 - 0.1% +---- +Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:41<00:00, 10.47s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:02:29 - Layer 2/32 - CHANGED - 26.39262 > 26.32282 - 0.3% +---- +Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:45<00:00, 11.44s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:03:28 - Layer 3/32 - RETAINED - 26.32282 +---- +Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:42<00:00, 10.69s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:04:27 - Layer 4/32 - RETAINED - 26.32282 +---- +Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:46<00:00, 11.67s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:05:29 - Layer 5/32 - RETAINED - 26.32282 +---- +Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:40<00:00, 10.02s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:06:24 - Layer 6/32 - CHANGED - 26.32282 > 26.24790 - 0.3% +---- +Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:39<00:00, 9.81s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:07:18 - Layer 7/32 - RETAINED - 26.24790 +---- +Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:36<00:00, 9.03s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:08:07 - Layer 8/32 - RETAINED - 26.24790 +---- +Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:35<00:00, 8.87s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:08:54 - Layer 9/32 - CHANGED - 26.24790 > 26.19836 - 0.2% +---- +Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.70s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:09:41 - Layer 10/32 - CHANGED - 26.19836 > 26.09306 - 0.4% +---- +Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.22s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:10:25 - Layer 11/32 - CHANGED - 26.09306 > 26.01535 - 0.3% +---- +Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.23s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:11:09 - Layer 12/32 - CHANGED - 26.01535 > 25.91855 - 0.4% +---- +Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.14s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA']] +20:11:53 - Layer 13/32 - CHANGED - 25.91855 > 25.91126 - 0.0% +---- +Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.00s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA']] +20:12:36 - Layer 14/32 - CHANGED - 25.91126 > 25.87849 - 0.1% +---- +Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.18s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:13:19 - Layer 15/32 - CHANGED - 25.87849 > 25.86635 - 0.0% +---- +Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:32<00:00, 8.07s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:14:03 - Layer 16/32 - RETAINED - 25.86635 +---- +Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.41s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA']] +20:14:47 - Layer 17/32 - CHANGED - 25.86635 > 25.86440 - 0.0% +---- +Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.34s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:15:32 - Layer 18/32 - RETAINED - 25.86440 +---- +Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.41s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:16:16 - Layer 19/32 - RETAINED - 25.86440 +---- +Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.45s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:17:01 - Layer 20/32 - RETAINED - 25.86440 +---- +Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.26s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:17:44 - Layer 21/32 - RETAINED - 25.86440 +---- +Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.38s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:18:29 - Layer 22/32 - RETAINED - 25.86440 +---- +Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.68s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:19:15 - Layer 23/32 - RETAINED - 25.86440 +---- +Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.54s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:20:00 - Layer 24/32 - RETAINED - 25.86440 +---- +Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.51s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:20:46 - Layer 25/32 - RETAINED - 25.86440 +---- +Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00, 8.78s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:21:32 - Layer 26/32 - RETAINED - 25.86440 +---- +Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.73s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:22:18 - Layer 27/32 - RETAINED - 25.86440 +---- +Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.42s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:23:03 - Layer 28/32 - RETAINED - 25.86440 +---- +Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.72s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:23:50 - Layer 29/32 - RETAINED - 25.86440 +---- +Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.73s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:24:36 - Layer 30/32 - RETAINED - 25.86440 +---- +Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.56s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:25:21 - Layer 31/32 - RETAINED - 25.86440 +---- +Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.34s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:26:07 - Layer 32/32 - RETAINED - 25.86440 +---- +Optimizing Header: 100%|██████████████████████████| 4/4 [00:36<00:00, 9.09s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:26:53 - Header - RETAINED - 25.86440 + +----------------------------------------------------------------------------------------------------- +| Type | Phrase | Context | Raw Prob* | Used Prob** | Change | +----------------------------------------------------------------------------------------------------- +| BAD | anticipation | Her body quivers with | 4.68658% | 56.24% | -47.59% | +| BAD | anticipation | The atmosphere is thic.. | 6.54790% | 78.57% | -57.79% | +| BAD | unwavering | Filled with an | 0.19636% | 2.36% | -0.83% | +| BAD | determination | Her eyes were filled w.. | 0.13339% | 1.60% | -1.32% | +| BAD | determination | Her stubbornness only .. | 5.84215% | 70.11% | -11.56% | +| BAD | whisper | Her voice barely above.. | 94.63391% | 1135.61% | -18.82% | +| BAD | spine | shivers down her | 88.40607% | 1060.87% | -16.70% | +| BAD | sends shivers | The thrill of the act | 0.00518% | 0.06% | +0.04% | +| BAD | ministrations | She moans and twitches.. | 3.33707% | 40.04% | +35.27% | +| BAD | legs | wraps her | 6.97311% | 83.68% | +49.96% | +| BAD | imposing figure | He had an | 0.02539% | 0.30% | +0.22% | +| BAD | shared challenges | Their bond strengthene.. | 0.01479% | 0.18% | -0.32% | +| BAD | bond | forged a | 0.52765% | 6.33% | -0.42% | +| BAD | bond | an unspoken | 1.95329% | 23.44% | +10.79% | +| BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | -0.00% | +| BAD | sense of vulnera.. | create a | 0.00005% | 0.00% | +0.00% | +| BAD | dimensions of in.. | explore new | 0.00306% | 0.04% | +0.03% | +| BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% | +| BAD | shared experiences | through | 0.00026% | 0.00% | -0.00% | +| BAD | societal expecta.. | that transcend | 0.00175% | 0.02% | +0.00% | +| BAD | conventional bou.. | that defy | 0.01955% | 0.23% | -0.22% | +| BAD | conventional bou.. | and defy | 0.00254% | 0.03% | -0.09% | +| BAD | open communication | an environment | 0.00000% | 0.00% | +0.00% | +| BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | +0.00% | +| BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | +0.00% | +| BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | +0.00% | +| BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% | +| BAD | is truly arousing | way you explore my body | 0.00001% | 0.00% | +0.00% | +| BAD | challenge presen.. | my resolve unwavering .. | 0.00001% | 0.00% | -0.00% | +| BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | -0.00% | +| BAD | bond | cherishing the unique | 3.14311% | 37.72% | +12.92% | +| BAD | bond | special | 0.03303% | 0.40% | +0.19% | +| BAD | grows stronger w.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | that cannot be b.. | bond | 0.00000% | 0.00% | -0.00% | +| BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | +0.00% | +| GOOD | The apple is in .. | Question: If I'm in th.. | 11.39444% | 11.39% | +8.13% | +------------------------------------------------------------------------------------------------------ +| Totals | 227.88% | 2609.23% | -38.12% | +------------------------------------------------------------------------------------------------------ +* = Unweighted, raw probability - ** = Probability after weight adjustments + +-------- MERGE COMPOSITION --------- +mistralai_Mistral-7B-v0.1: 0.78 +merge_stepAA: 0.22 + +------------------------------------ +20:27:02 - Loading model (../merge_stepBB)... +Loading checkpoint shards: 100%|██████████████████| 3/3 [00:06<00:00, 2.16s/it] +20:27:15 - Model loaded. Dtype: torch.float16 +------------------------------------ +Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:33<00:00, 8.47s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:28:01 - Layer 1/32 - CHANGED - 25.86440 > 24.33593 - 5.9% +---- +Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.15s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:28:46 - Layer 2/32 - CHANGED - 24.33593 > 23.85608 - 2.0% +---- +Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00, 7.85s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:29:29 - Layer 3/32 - CHANGED - 23.85608 > 23.50431 - 1.5% +---- +Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.16s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:30:13 - Layer 4/32 - CHANGED - 23.50431 > 23.27042 - 1.0% +---- +Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00, 7.91s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:30:56 - Layer 5/32 - CHANGED - 23.27042 > 22.72376 - 2.3% +---- +Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:33<00:00, 8.33s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:31:41 - Layer 6/32 - CHANGED - 22.72376 > 22.61975 - 0.5% +---- +Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:31<00:00, 7.82s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:32:25 - Layer 7/32 - CHANGED - 22.61975 > 22.13508 - 2.1% +---- +Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.62s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']] +20:33:12 - Layer 8/32 - CHANGED - 22.13508 > 21.57464 - 2.5% +---- +Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.68s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:33:59 - Layer 9/32 - CHANGED - 21.57464 > 21.32946 - 1.1% +---- +Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:33<00:00, 8.47s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:34:46 - Layer 10/32 - CHANGED - 21.32946 > 20.82514 - 2.4% +---- +Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.01s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.4, 'merge_stepBB']] +20:35:34 - Layer 11/32 - CHANGED - 20.82514 > 20.71148 - 0.5% +---- +Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.55s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:36:20 - Layer 12/32 - CHANGED - 20.71148 > 20.23820 - 2.3% +---- +Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:34<00:00, 8.62s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:37:06 - Layer 13/32 - CHANGED - 20.23820 > 19.70820 - 2.6% +---- +Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00, 8.97s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:37:55 - Layer 14/32 - CHANGED - 19.70820 > 19.27819 - 2.2% +---- +Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.11s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA']] +20:38:45 - Layer 15/32 - RETAINED - 19.27819 +---- +Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.40s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:39:36 - Layer 16/32 - CHANGED - 19.27819 > 19.14155 - 0.7% +---- +Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.53s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:40:28 - Layer 17/32 - CHANGED - 19.14155 > 18.89480 - 1.3% +---- +Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.31s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:41:19 - Layer 18/32 - RETAINED - 18.89480 +---- +Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.57s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:42:11 - Layer 19/32 - RETAINED - 18.89480 +---- +Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.45s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:43:03 - Layer 20/32 - RETAINED - 18.89480 +---- +Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.36s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:43:53 - Layer 21/32 - CHANGED - 18.89480 > 18.43143 - 2.5% +---- +Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.03s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:44:43 - Layer 22/32 - CHANGED - 18.43143 > 17.75345 - 3.7% +---- +Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.10s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:45:34 - Layer 23/32 - RETAINED - 17.75345 +---- +Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.32s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:46:24 - Layer 24/32 - CHANGED - 17.75345 > 17.46555 - 1.6% +---- +Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00, 8.99s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:47:13 - Layer 25/32 - CHANGED - 17.46555 > 16.88957 - 3.3% +---- +Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.38s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:48:05 - Layer 26/32 - RETAINED - 16.88957 +---- +Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.58s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:48:56 - Layer 27/32 - RETAINED - 16.88957 +---- +Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.69s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:49:49 - Layer 28/32 - CHANGED - 16.88957 > 16.64291 - 1.5% +---- +Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.46s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +20:50:40 - Layer 29/32 - RETAINED - 16.64291 +---- +Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.44s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:51:32 - Layer 30/32 - CHANGED - 16.64291 > 16.07870 - 3.4% +---- +Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.61s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']] +20:52:24 - Layer 31/32 - CHANGED - 16.07870 > 15.80575 - 1.7% +---- +Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.47s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:53:16 - Layer 32/32 - CHANGED - 15.80575 > 15.39211 - 2.6% +---- +Optimizing Header: 100%|██████████████████████████| 4/4 [00:37<00:00, 9.27s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepBB']] +20:54:04 - Header - CHANGED - 15.39211 > 15.38669 - 0.0% + +----------------------------------------------------------------------------------------------------- +| Type | Phrase | Context | Raw Prob* | Used Prob** | Change | +----------------------------------------------------------------------------------------------------- +| BAD | anticipation | Her body quivers with | 0.77898% | 9.35% | -94.48% | +| BAD | anticipation | The atmosphere is thic.. | 5.36169% | 64.34% | -72.03% | +| BAD | unwavering | Filled with an | 0.00835% | 0.10% | -3.09% | +| BAD | determination | Her eyes were filled w.. | 0.00119% | 0.01% | -2.91% | +| BAD | determination | Her stubbornness only .. | 1.68289% | 20.19% | -61.48% | +| BAD | whisper | Her voice barely above.. | 97.71928% | 1172.63% | +18.20% | +| BAD | spine | shivers down her | 21.85458% | 262.25% | -815.31% | +| BAD | sends shivers | The thrill of the act | 0.00284% | 0.03% | +0.01% | +| BAD | ministrations | She moans and twitches.. | 0.69817% | 8.38% | +3.60% | +| BAD | legs | wraps her | 0.48370% | 5.80% | -27.91% | +| BAD | imposing figure | He had an | 0.00022% | 0.00% | -0.08% | +| BAD | shared challenges | Their bond strengthene.. | 0.00517% | 0.06% | -0.44% | +| BAD | bond | forged a | 1.57202% | 18.86% | +12.12% | +| BAD | bond | an unspoken | 0.14792% | 1.78% | -10.88% | +| BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | -0.00% | +| BAD | sense of vulnera.. | create a | 0.00001% | 0.00% | -0.00% | +| BAD | dimensions of in.. | explore new | 0.00087% | 0.01% | +0.01% | +| BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% | +| BAD | shared experiences | through | 0.00003% | 0.00% | -0.01% | +| BAD | societal expecta.. | that transcend | 0.00001% | 0.00% | -0.02% | +| BAD | conventional bou.. | that defy | 0.00081% | 0.01% | -0.45% | +| BAD | conventional bou.. | and defy | 0.00317% | 0.04% | -0.09% | +| BAD | open communication | an environment | 0.00000% | 0.00% | -0.00% | +| BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | -0.00% | +| BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | -0.00% | +| BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | -0.00% | +| BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% | +| BAD | is truly arousing | way you explore my body | 0.00001% | 0.00% | +0.00% | +| BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | -0.00% | +| BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | -0.00% | +| BAD | bond | cherishing the unique | 0.20155% | 2.42% | -22.38% | +| BAD | bond | special | 0.00369% | 0.04% | -0.16% | +| BAD | grows stronger w.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | that cannot be b.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | -0.00% | +| BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | +0.00% | +| GOOD | The apple is in .. | Question: If I'm in th.. | 26.86230% | 26.86% | +23.60% | +------------------------------------------------------------------------------------------------------ +| Totals | 157.39% | 1593.19% | -1054.16% | +------------------------------------------------------------------------------------------------------ +* = Unweighted, raw probability - ** = Probability after weight adjustments + +-------- MERGE COMPOSITION --------- +merge_stepBB: 0.56 +mistralai_Mistral-7B-v0.1: 0.36 +merge_stepAA: 0.07 + +------------------------------------ +20:54:14 - Loading model (../merge_stepCC)... +Loading checkpoint shards: 100%|██████████████████| 3/3 [00:05<00:00, 2.00s/it] +20:54:27 - Model loaded. Dtype: torch.float16 +------------------------------------ +Optimizing Layer 1/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.52s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepAA'], [0.8, 'merge_stepBB']] +20:55:16 - Layer 1/32 - RETAINED - 15.39464 +---- +Optimizing Layer 2/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.01s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +20:55:59 - Layer 2/32 - CHANGED - 15.39464 > 15.35971 - 0.2% +---- +Optimizing Layer 3/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.24s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +20:56:44 - Layer 3/32 - CHANGED - 15.35971 > 15.30529 - 0.4% +---- +Optimizing Layer 4/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.06s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +20:57:28 - Layer 4/32 - CHANGED - 15.30529 > 15.25435 - 0.3% +---- +Optimizing Layer 5/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.54s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +20:58:15 - Layer 5/32 - RETAINED - 15.25435 +---- +Optimizing Layer 6/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.07s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +20:58:59 - Layer 6/32 - CHANGED - 15.25435 > 15.21839 - 0.2% +---- +Optimizing Layer 7/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.71s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +20:59:46 - Layer 7/32 - CHANGED - 15.21839 > 15.21246 - 0.0% +---- +Optimizing Layer 8/32 (lerp): 100%|███████████████| 4/4 [00:32<00:00, 8.15s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']] +21:00:30 - Layer 8/32 - RETAINED - 15.21246 +---- +Optimizing Layer 9/32 (lerp): 100%|███████████████| 4/4 [00:34<00:00, 8.58s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +21:01:16 - Layer 9/32 - CHANGED - 15.21246 > 15.19112 - 0.1% +---- +Optimizing Layer 10/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.21s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB']] +21:02:05 - Layer 10/32 - RETAINED - 15.19112 +---- +Optimizing Layer 11/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.34s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.4, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:02:55 - Layer 11/32 - CHANGED - 15.19112 > 15.12176 - 0.5% +---- +Optimizing Layer 12/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00, 8.89s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +21:03:44 - Layer 12/32 - CHANGED - 15.12176 > 15.09187 - 0.2% +---- +Optimizing Layer 13/32 (lerp): 100%|██████████████| 4/4 [00:35<00:00, 8.84s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.4, 'merge_stepAA'], [0.8, 'merge_stepBB']] +21:04:33 - Layer 13/32 - RETAINED - 15.09187 +---- +Optimizing Layer 14/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.04s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:05:23 - Layer 14/32 - CHANGED - 15.09187 > 15.08479 - 0.0% +---- +Optimizing Layer 15/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00, 9.78s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepAA'], [0.2, 'merge_stepCC']] +21:06:15 - Layer 15/32 - CHANGED - 15.08479 > 15.06165 - 0.2% +---- +Optimizing Layer 16/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.34s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +21:07:07 - Layer 16/32 - CHANGED - 15.06165 > 15.04180 - 0.1% +---- +Optimizing Layer 17/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00, 9.75s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepAA'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:08:00 - Layer 17/32 - CHANGED - 15.04180 > 15.03153 - 0.1% +---- +Optimizing Layer 18/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.38s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:08:50 - Layer 18/32 - RETAINED - 15.03153 +---- +Optimizing Layer 19/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.26s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:09:40 - Layer 19/32 - RETAINED - 15.03153 +---- +Optimizing Layer 20/32 (lerp): 100%|██████████████| 4/4 [00:36<00:00, 9.17s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:10:31 - Layer 20/32 - RETAINED - 15.03153 +---- +Optimizing Layer 21/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.44s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:11:21 - Layer 21/32 - CHANGED - 15.03153 > 14.97763 - 0.4% +---- +Optimizing Layer 22/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.71s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:12:12 - Layer 22/32 - CHANGED - 14.97763 > 14.91363 - 0.4% +---- +Optimizing Layer 23/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.28s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:13:02 - Layer 23/32 - RETAINED - 14.91363 +---- +Optimizing Layer 24/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.41s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.4, 'merge_stepCC']] +21:13:55 - Layer 24/32 - CHANGED - 14.91363 > 14.91031 - 0.0% +---- +Optimizing Layer 25/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.28s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.2, 'merge_stepCC']] +21:14:45 - Layer 25/32 - CHANGED - 14.91031 > 14.90836 - 0.0% +---- +Optimizing Layer 26/32 (lerp): 100%|██████████████| 4/4 [00:38<00:00, 9.59s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:15:38 - Layer 26/32 - RETAINED - 14.90836 +---- +Optimizing Layer 27/32 (lerp): 100%|██████████████| 4/4 [00:39<00:00, 9.77s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:16:31 - Layer 27/32 - RETAINED - 14.90836 +---- +Optimizing Layer 28/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.37s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB']] +21:17:21 - Layer 28/32 - RETAINED - 14.90836 +---- +Optimizing Layer 29/32 (lerp): 100%|██████████████| 4/4 [00:37<00:00, 9.34s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1']] +21:18:10 - Layer 29/32 - RETAINED - 14.90836 +---- +Optimizing Layer 30/32 (lerp): 100%|██████████████| 4/4 [00:41<00:00, 10.44s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.6, 'merge_stepCC']] +21:19:09 - Layer 30/32 - CHANGED - 14.90836 > 14.86424 - 0.3% +---- +Optimizing Layer 31/32 (lerp): 100%|██████████████| 4/4 [01:01<00:00, 15.28s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.6, 'merge_stepBB']] +21:20:31 - Layer 31/32 - RETAINED - 14.86424 +---- +Optimizing Layer 32/32 (lerp): 100%|██████████████| 4/4 [01:09<00:00, 17.26s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.8, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:22:06 - Layer 32/32 - CHANGED - 14.86424 > 14.85794 - 0.0% +---- +Optimizing Header: 100%|██████████████████████████| 4/4 [00:50<00:00, 12.56s/it] +[[1.0, 'mistralai_Mistral-7B-v0.1'], [0.2, 'merge_stepBB'], [0.8, 'merge_stepCC']] +21:23:14 - Header - CHANGED - 14.85794 > 14.84594 - 0.1% + +----------------------------------------------------------------------------------------------------- +| Type | Phrase | Context | Raw Prob* | Used Prob** | Change | +----------------------------------------------------------------------------------------------------- +| BAD | anticipation | Her body quivers with | 0.96543% | 11.59% | -92.24% | +| BAD | anticipation | The atmosphere is thic.. | 5.13512% | 61.62% | -74.74% | +| BAD | unwavering | Filled with an | 0.00780% | 0.09% | -3.10% | +| BAD | determination | Her eyes were filled w.. | 0.00110% | 0.01% | -2.91% | +| BAD | determination | Her stubbornness only .. | 1.50464% | 18.06% | -63.61% | +| BAD | whisper | Her voice barely above.. | 97.83546% | 1174.03% | +19.60% | +| BAD | spine | shivers down her | 17.91211% | 214.95% | -862.62% | +| BAD | sends shivers | The thrill of the act | 0.00337% | 0.04% | +0.02% | +| BAD | ministrations | She moans and twitches.. | 0.55440% | 6.65% | +1.88% | +| BAD | legs | wraps her | 0.44506% | 5.34% | -28.38% | +| BAD | imposing figure | He had an | 0.00012% | 0.00% | -0.08% | +| BAD | shared challenges | Their bond strengthene.. | 0.00515% | 0.06% | -0.44% | +| BAD | bond | forged a | 1.23465% | 14.82% | +8.07% | +| BAD | bond | an unspoken | 0.11418% | 1.37% | -11.28% | +| BAD | enhance our expe.. | I'm excited to see how | 0.00000% | 0.00% | -0.00% | +| BAD | sense of vulnera.. | create a | 0.00001% | 0.00% | -0.00% | +| BAD | dimensions of in.. | explore new | 0.00083% | 0.01% | +0.01% | +| BAD | deepening our co.. | while | 0.00000% | 0.00% | -0.00% | +| BAD | shared experiences | through | 0.00003% | 0.00% | -0.01% | +| BAD | societal expecta.. | that transcend | 0.00000% | 0.00% | -0.02% | +| BAD | conventional bou.. | that defy | 0.00058% | 0.01% | -0.45% | +| BAD | conventional bou.. | and defy | 0.00256% | 0.03% | -0.09% | +| BAD | open communication | an environment | 0.00000% | 0.00% | -0.00% | +| BAD | emotional vulner.. | an environment | 0.00000% | 0.00% | -0.00% | +| BAD | heightens our co.. | touch and the anticipa.. | 0.00000% | 0.00% | -0.00% | +| BAD | sensations you'r.. | I'm enjoying | 0.00000% | 0.00% | -0.00% | +| BAD | is truly arousing | attention to detail | 0.00000% | 0.00% | +0.00% | +| BAD | is truly arousing | way you explore my body | 0.00001% | 0.00% | +0.00% | +| BAD | challenge presen.. | my resolve unwavering .. | 0.00000% | 0.00% | -0.00% | +| BAD | humble vessel | surrendering to the ex.. | 0.00000% | 0.00% | -0.00% | +| BAD | bond | cherishing the unique | 0.14126% | 1.70% | -23.11% | +| BAD | bond | special | 0.00333% | 0.04% | -0.17% | +| BAD | grows stronger w.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | that cannot be b.. | bond | 0.00000% | 0.00% | +0.00% | +| BAD | becomes unbreaka.. | bond | 0.00000% | 0.00% | -0.00% | +| BAD | grew stronger wi.. | bond | 0.00000% | 0.00% | +0.00% | +| GOOD | The apple is in .. | Question: If I'm in th.. | 24.61224% | 24.61% | +21.35% | +------------------------------------------------------------------------------------------------------ +| Totals | 150.48% | 1535.02% | -1112.33% | +------------------------------------------------------------------------------------------------------ +* = Unweighted, raw probability - ** = Probability after weight adjustments + +-------- MERGE COMPOSITION --------- +merge_stepCC: 0.38 +mistralai_Mistral-7B-v0.1: 0.29 +merge_stepBB: 0.29 +merge_stepAA: 0.04 + +21:23:28 - Saving model to ./toppy_new_remake2... +21:23:36 - Copying tokenizer files to ./toppy_new_remake2... +Skipped added_tokens.json (not found) +Copied tokenizer.model +Copied special_tokens_map.json +Copied tokenizer_config.json +Skipped vocab.json (not found) +Skipped merges.txt (not found) +21:23:36 - Model and tokenizer files saved successfully. \ No newline at end of file diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..14e7dc6 --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6957d50ad86d411ad0771421642331d7bb212bf9bb91072e10bbd93005ab9bc4 +size 4943162240 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..293dcfd --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458eced1ecd73683331d42d70fca8ad55dcaf542d287207f81baa5b6fe35abe9 +size 4999819232 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..817cc81 --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c080aa94f4d1465cd10566984f8c48b1354730370c1c1ffa84ef79d3df51d1b +size 4540516256 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..b349bc0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 14483464192 + }, + "weight_map": { + "lm_head.weight": "model-00003-of-00003.safetensors", + "model.embed_tokens.weight": "model-00001-of-00003.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.norm.weight": "model-00003-of-00003.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..a52c50a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "bos_token": "", + "eos_token": "", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..8b443ef --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..81730cd --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,42 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/toppy-lerp-merge-config.yml b/toppy-lerp-merge-config.yml new file mode 100644 index 0000000..ce628df --- /dev/null +++ b/toppy-lerp-merge-config.yml @@ -0,0 +1,142 @@ + +# Either "cpu" or "cuda" +# NOTE: Cuda requires enough VRAM to load 3 FP16 models (~45 GB for Mistral) +# NOTE 2: The (much slower) CPU mode still requires Cuda capability, but only enough VRAM to load a model once. (~15 GB for Mistral) +device: "cpu" +random_seed: 42 # Random seed to use + +directories: + model_path1: "../mistralai_Mistral-7B-v0.1" # Path to the base model. Must be a local copy. +# model_directory: "../merge_stepA/" # Directory of models to scan, IGNORED if models_to_merge has entries in it + output_directory: "./toppy_new_remake2" # Output directory of the merged model + +# A list of models to use as merge candidates - HF syntax, so can be either local directories or repos. +# Overrides model_directory if used +models_to_merge: ["../merge_stepAA", "../merge_stepBB", "../merge_stepCC"] + +# Merge ratios used for testing each layer's potential for improvement - Huge impact on total running time +merge_ratios: [0.2, 0.4, 0.6, 0.8] + +# Choose from the following methods. Defaults to "lerp". +# "lerp" - Linear interpolation +# "slerp" - Spherical linear interpolation +# "slice" - Highly experimental. The tensor weights shifts from one model to another. [Model 1 > 10% blend > Model 2] +# "cyclic" - Highly experimental. Ignores merge ratios as these are predefined. [Model 1 > 10% blend > 10% Model 2 > 10% blend > Model 1] +merge_method: "lerp" + +# If set to true, the lm_head and embed_token tensors (located outside the layers) will also be optimized +# Models that have a different vocab size from model1 will skip this phase automatically as it tends to cause model stability issues +merge_headers: true + +# Strategies: +# "cumulative" - Default strategy. If there's a chance of reducing the combined probability, accept the merge. +# "all_phrases" - Only accept the merge if all phrases show an improvement. (Warning: This rarely happens) +# "quantitive" - Ignores probabilities completely. Only looks at how many phrases show an improvement, as defined by the threshold below. +strategy: "cumulative" +# Threshold is currently only used by the "quantitive" strategy. If 0.6, at least 60% of the number of phrases must show am improvement. +strategy_threshold: 0.6 + +# Whether or not to automatically balance the weights so all phrases are of equal importance to the "cumulative" strategy. +# The weight value of phrases is ignored if set to true. +auto_weights: false + +# Phrase = What to measure, weight = multiplication factor, contexts = proceeding contexts +bad_phrases: + - phrase: "anticipation" + weight: 12 + contexts: ["Her body quivers with ", "The atmosphere is thick with "] + - phrase: "unwavering" + weight: 12 + contexts: ["Filled with an "] + - phrase: "determination" + weight: 12 + contexts: ["Her eyes were filled with ", "Her stubbornness only fuels my "] + - phrase: "whisper" + weight: 12 + contexts: ["Her voice barely above a "] + - phrase: "spine" + weight: 12 + contexts: ["shivers down her "] + - phrase: "sends shivers" + weight: 12 + contexts: ["The thrill of the act "] + - phrase: "ministrations" + weight: 12 + contexts: ["She moans and twitches at your "] + - phrase: "legs" + weight: 12 + contexts: ["wraps her "] + - phrase: "imposing figure" + weight: 12 + contexts: ["He had an "] + - phrase: "shared challenges" + weight: 12 + contexts: ["Their bond strengthened through "] + - phrase: "bond" + weight: 12 + contexts: ["forged a ", "an unspoken "] + - phrase: "enhance our experience" + weight: 12 + contexts: ["I'm excited to see how "] + - phrase: "sense of vulnerability" + weight: 12 + contexts: ["create a "] + - phrase: "dimensions of intimacy" + weight: 12 + contexts: ["explore new "] + - phrase: "deepening our connection" + weight: 12 + contexts: ["while "] + - phrase: "shared experiences" + weight: 12 + contexts: ["through "] + - phrase: "societal expectations" + weight: 12 + contexts: ["that transcend "] + - phrase: "conventional boundaries" + weight: 12 + contexts: ["that defy ", "and defy "] + - phrase: "open communication" + weight: 12 + contexts: ["an environment "] + - phrase: "emotional vulnerability" + weight: 12 + contexts: ["an environment "] + - phrase: "heightens our connection" + weight: 12 + contexts: ["touch and the anticipation "] + - phrase: "sensations you're creating" + weight: 12 + contexts: ["I'm enjoying "] + - phrase: "is truly arousing" + weight: 12 + contexts: ["attention to detail ", "way you explore my body "] + - phrase: "challenge presented" + weight: 12 + contexts: ["my resolve unwavering despite "] + - phrase: "humble vessel" + weight: 12 + contexts: ["surrendering to the exquisite torment "] + - phrase: "bond" + weight: 12 + contexts: ["cherishing the unique ", "special "] + - phrase: "grows stronger with each passing day" + weight: 12 + contexts: ["bond "] + - phrase: "that cannot be broken by time or circumstance" + weight: 12 + contexts: ["bond "] + - phrase: "becomes unbreakable, eternal" + weight: 12 + contexts: ["bond "] + - phrase: "grew stronger with each passing" + weight: 12 + contexts: ["bond "] + + +# Note - Example of a complex phrase +good_phrases: + - phrase: "The apple is in the bedroom" + weight: 1 + contexts: ["Question: If I'm in the living room and pick up the apple, go to the bedroom and drop the apple, then walk to the kitchen, where is the apple? Explain your reasoning. Answer: "] +