初始化项目,由ModelHub XC社区提供模型

Model: nlpguy/Hermes-low-tune-3.1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-06 18:24:21 +08:00
commit 1110f7e76d
13 changed files with 91539 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

33
LICENSE Normal file
View File

@@ -0,0 +1,33 @@
Copyright 2024 Pseudonym: nlpguy
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Furthermore, the following terms and conditions apply:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the “Software”), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

181
README.md Normal file
View File

@@ -0,0 +1,181 @@
---
tags:
- mergekit
- merge
base_model:
- teknium/OpenHermes-2.5-Mistral-7B
- nlpguy/Hermes-low-tune-2
- charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B
- openaccess-ai-collective/openhermes-2_5-dpo-no-robots
- flemmingmiguel/Mistrality-7B
- beowolx/MistralHermes-CodePro-7B-v1
license_name: apache-2.0-mit-dual-license
license_link: https://huggingface.co/nlpguy/Hermes-low-tune-3.1/raw/main/LICENSE
model-index:
- name: Hermes-low-tune-3.1
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: AI2 Reasoning Challenge (25-Shot)
type: ai2_arc
config: ARC-Challenge
split: test
args:
num_few_shot: 25
metrics:
- type: acc_norm
value: 65.44
name: normalized accuracy
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: HellaSwag (10-Shot)
type: hellaswag
split: validation
args:
num_few_shot: 10
metrics:
- type: acc_norm
value: 84.6
name: normalized accuracy
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU (5-Shot)
type: cais/mmlu
config: all
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 64.13
name: accuracy
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: TruthfulQA (0-shot)
type: truthful_qa
config: multiple_choice
split: validation
args:
num_few_shot: 0
metrics:
- type: mc2
value: 53.59
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: Winogrande (5-shot)
type: winogrande
config: winogrande_xl
split: validation
args:
num_few_shot: 5
metrics:
- type: acc
value: 78.61
name: accuracy
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: GSM8k (5-shot)
type: gsm8k
config: main
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 63.46
name: accuracy
source:
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=nlpguy/Hermes-low-tune-3.1
name: Open LLM Leaderboard
---
# merged
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
## Merge Details
### Merge Method
This model was merged using the [task arithmetic](https://arxiv.org/abs/2212.04089) merge method using [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) as a base.
### Models Merged
The following models were included in the merge:
* [nlpguy/Hermes-low-tune-2](https://huggingface.co/nlpguy/Hermes-low-tune-2)
* [charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B](https://huggingface.co/charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B)
* [openaccess-ai-collective/openhermes-2_5-dpo-no-robots](https://huggingface.co/openaccess-ai-collective/openhermes-2_5-dpo-no-robots)
* [flemmingmiguel/Mistrality-7B](https://huggingface.co/flemmingmiguel/Mistrality-7B)
* [beowolx/MistralHermes-CodePro-7B-v1](https://huggingface.co/beowolx/MistralHermes-CodePro-7B-v1)
### Configuration
The following YAML configuration was used to produce this model:
```yaml
base_model: teknium/OpenHermes-2.5-Mistral-7B
dtype: bfloat16
merge_method: task_arithmetic
slices:
- sources:
- layer_range: [0, 32]
model: teknium/OpenHermes-2.5-Mistral-7B
- layer_range: [0, 32]
model: nlpguy/Hermes-low-tune-2
parameters:
weight: 0.2
- layer_range: [0, 32]
model: beowolx/MistralHermes-CodePro-7B-v1
parameters:
weight: 0.2
- layer_range: [0, 32]
model: flemmingmiguel/Mistrality-7B
parameters:
weight: 0.2
- layer_range: [0, 32]
model: charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B
parameters:
weight: 0.2
- layer_range: [0, 32]
model: openaccess-ai-collective/openhermes-2_5-dpo-no-robots
parameters:
weight: 0.2
```
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_nlpguy__Hermes-low-tune-3.1)
| Metric |Value|
|---------------------------------|----:|
|Avg. |68.31|
|AI2 Reasoning Challenge (25-Shot)|65.44|
|HellaSwag (10-Shot) |84.60|
|MMLU (5-Shot) |64.13|
|TruthfulQA (0-shot) |53.59|
|Winogrande (5-shot) |78.61|
|GSM8k (5-shot) |63.46|

4
added_tokens.json Normal file
View File

@@ -0,0 +1,4 @@
{
"<|im_end|>": 32000,
"<|im_start|>": 32001
}

25
config.json Normal file
View File

@@ -0,0 +1,25 @@
{
"_name_or_path": "teknium/OpenHermes-2.5-Mistral-7B",
"architectures": [
"MistralForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 32000,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"rms_norm_eps": 1e-05,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.35.2",
"use_cache": false,
"vocab_size": 32002
}

27
mergekit_config.yml Normal file
View File

@@ -0,0 +1,27 @@
base_model: teknium/OpenHermes-2.5-Mistral-7B
dtype: bfloat16
merge_method: task_arithmetic
slices:
- sources:
- layer_range: [0, 32]
model: teknium/OpenHermes-2.5-Mistral-7B
- layer_range: [0, 32]
model: nlpguy/Hermes-low-tune-2
parameters:
weight: 0.2
- layer_range: [0, 32]
model: beowolx/MistralHermes-CodePro-7B-v1
parameters:
weight: 0.2
- layer_range: [0, 32]
model: flemmingmiguel/Mistrality-7B
parameters:
weight: 0.2
- layer_range: [0, 32]
model: charlesdedampierre/TopicNeuralHermes-2.5-Mistral-7B
parameters:
weight: 0.2
- layer_range: [0, 32]
model: openaccess-ai-collective/openhermes-2_5-dpo-no-robots
parameters:
weight: 0.2

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1531b167a823ac44552d41600312bbeb2aab78e82878314b3a823544a125f02a
size 9783613704

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:30fe8e9f5f88f752ed90e67ce5fffe7ce1529b83075a27f86e58c8e95682380d
size 4699917104

File diff suppressed because one or more lines are too long

23
special_tokens_map.json Normal file
View File

@@ -0,0 +1,23 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

91140
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

BIN
tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

61
tokenizer_config.json Normal file
View File

@@ -0,0 +1,61 @@
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32001": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": null,
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"trust_remote_code": false,
"unk_token": "<unk>",
"use_default_system_prompt": true,
"use_fast": true
}