初始化项目,由ModelHub XC社区提供模型

Model: martyn/mixtral-megamerge-dare-8x7b-v2
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-12 13:33:08 +08:00
commit 5f6208feed
29 changed files with 92385 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

39
README.md Normal file
View File

@@ -0,0 +1,39 @@
---
license: apache-2.0
language:
- en
pipeline_tag: text-generation
inference: false
tags:
- dare
- super mario merge
- pytorch
- mixtral
- merge
---
# mixtral megamerge 8x7b v2
The following models were merged with DARE using [https://github.com/martyn/safetensors-merge-supermario](https://github.com/martyn/safetensors-merge-supermario)
## Mergelist
```
mistralai/Mixtral-8x7B-v0.1
mistralai/Mixtral-8x7B-Instruct-v0.1
cognitivecomputations/dolphin-2.6-mixtral-8x7b
Brillibitg/Instruct_Mixtral-8x7B-v0.1_Dolly15K
orangetin/OpenHermes-Mixtral-8x7B
NeverSleep/Noromaid-v0.1-mixtral-8x7b-v3
```
## Merge command
```
python3 hf_merge.py to_merge_mixtral2.txt mixtral-2 -p 0.15 -lambda 1.95
```
### Notes
* MoE gates were filtered for compatibility then averaged with `(tensor1 + tensor2)/2`
* seems to generalize prompting formats and sampling settings

30
config.json Normal file
View File

@@ -0,0 +1,30 @@
{
"_name_or_path": "/workspace/models/Mixtral-8x7B-v0.1",
"architectures": [
"MixtralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 32000,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mixtral",
"num_attention_heads": 32,
"num_experts_per_tok": 2,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"num_local_experts": 8,
"output_router_logits": true,
"rms_norm_eps": 1e-05,
"rope_theta": 1000000.0,
"router_aux_loss_coef": 0.02,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.37.0.dev0",
"use_cache": false,
"vocab_size": 32002
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"transformers_version": "4.36.0.dev0"
}

6
mergelist.txt Normal file
View File

@@ -0,0 +1,6 @@
mistralai/Mixtral-8x7B-v0.1
mistralai/Mixtral-8x7B-Instruct-v0.1
cognitivecomputations/dolphin-2.6-mixtral-8x7b
Brillibits/Instruct_Mixtral-8x7B-v0.1_Dolly15K
orangetin/OpenHermes-Mixtral-8x7B
NeverSleep/Noromaid-v0.1-mixtral-8x7b-v3

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c0dfbf311572b30b01615a07cbf39ec8774dc2f16010c348f809978ad66f4be8
size 4892825968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fdbad6e66d2738e4ae609ce55ca29d13507dfd680d2fbe43bd52a4ea8cfb6ff9
size 4983004016

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:977003508efb2a265d942882e37137d9e9fbd68b08f8d6b138f6a4afa70c99ec
size 4983004016

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:49ad786d36ebeb9acbfc626946fb162163ef2eaacbd350c7c87b972fe05f23ca
size 4899035200

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b97d8ac6f3206afaa693ca9d4b7cb8042d14b4a1221cac2664f4fb69f86e25d6
size 4983004016

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0bbc60737c1f16739d81a02b48cad29f346a13a31fa6999e9993071f56befd1f
size 4983004016

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8597ff157700bb16cd0cabaee0190cb99504e8aa5c798f8652b5bd3cf061768a
size 4899035248

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e215e26eb54fbc0a219779675453e7e5fbdeb437fc32602b3ac48ead5a374228
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e310742fa0b75372575c90d5d5069da316e39816a96ec58fc07cf42a3be2d4f8
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2e9daf46fb3d72b6a7f87ad3ef7897fd3b925271bc3319545e8ade20c391dc38
size 4899035248

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:711193b1fcb0d337a310a07fe2a6df2ea724cad75195866cee4c5f266890be28
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:24c168f48c07853526f6dd8af0ab51d2b13cce5e402d821682caaeca5ccaf274
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7f44556473cd376827bffa529de0bc787368b0498a7e2ff59350145285a96a87
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:490957d2945399f43ed92ea791301a38d15240fcace6c4659d743802559363e4
size 4899035248

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e9180ab0dd77add0d8ccdc1e92b9b719c1907056708f0ecc642134f317fec4f1
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7a30ea0e19ac5f9e36b4f410446910e7cb49c063f82f9cb198420a7559e31d4
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:821bbe6c0e5b73291ee864f311d09dd22a48cf3ad0d0f53e2d02d58388f15887
size 4899035248

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:40edce4bd5a5e6c80f2499fc8f22fe2035c452b6a0c0d188bf0d665dfaf35e07
size 4983004072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7a3c297a086cbb73e094bf3fb13ec4079d25ce8ac99e1c3166634bccb76c1d7d
size 4221695472

1002
model.safetensors.index.json Normal file

File diff suppressed because it is too large Load Diff

24
special_tokens_map.json Normal file
View File

@@ -0,0 +1,24 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": "</s>",
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91122
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

BIN
tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

61
tokenizer_config.json Normal file
View File

@@ -0,0 +1,61 @@
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32001": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "</s>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"trust_remote_code": true,
"unk_token": "<unk>",
"use_default_system_prompt": false,
"use_fast": true
}