commit 221b532b55abf8075678247c02f45595aba2c686 Author: ModelHub XC Date: Tue Jun 16 01:40:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: teknium/OpenHermes-13B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..43f2fc4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,51 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text + +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +pytorch_model.bin filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..635b065 --- /dev/null +++ b/README.md @@ -0,0 +1,158 @@ +--- +base_model: NousResearch/Llama-2-13b-hf +tags: +- llama-2 +- instruct +- finetune +- alpaca +- gpt4 +- synthetic data +- distillation +datasets: +- teknium/openhermes +model-index: +- name: openhermes-13b + results: [] +license: mit +language: +- en +--- + +# OpenHermes-13B + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/ovkrkIIUwJ9azhPtW6dAb.png) + +## Model description + +OpenHermes 13B is the first fine tune of the Hermes dataset that has a fully open source dataset! + +OpenHermes was trained on 242,000 entries of primarily GPT-4 generated data, from open datasets across the AI landscape, including: + +- GPTeacher - General Instruct, Roleplay v1, Roleplay v2, and Code Instruct Datasets, by Teknium +- WizardLM (v1, evol_instruct 70k), by WizardLM Team/nlpxucan +- Airoboros GPT-4 (v1.0), by JonDurbin +- Camel-AI's domain expert datasets, by the Camel-AI Team +- CodeAlpaca, by Sahil2801 +- GPT4-LLM and Unnatural Instructions, by Microsoft + +Filtering included removal of OpenAI refusals, disclaimers, and "As an AI" type examples and more + +The base dataset mix the model was trained on is identical to Nous-Hermes', minus the Nous-Instruct and PDACTL datasets which were private datasets. + +The WANDB Project is public and can be examined at this link: https://wandb.ai/teknium1/openhermes/runs/openhermes-v2-fullft-13b + +Huge thank you to [main_horse](https://twitter.com/main_horse) for compute access and a16z for sponsoring my work, and all the dataset creators and other people who's work has contributed to this project! + +## Example Outputs + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/wMSeFqUSBwCNefm7s6G1-.png) + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/00iVenvEOMWIO9X6EY2EZ.png) + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/o7hHbCbtwMLitDy-FWDAg.png) + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/-9ZT1FBSE2BJhDowoh6Gj.png) + +## Benchmark Information + +## Benchmark Results + +GPT-4All Benchmark Set +``` +| Task |Version| Metric |Value | |Stderr| +|-------------|------:|--------|-----:|---|-----:| +|arc_challenge| 0|acc |0.5009|± |0.0146| +| | |acc_norm|0.5247|± |0.0146| +|arc_easy | 0|acc |0.8127|± |0.0080| +| | |acc_norm|0.7854|± |0.0084| +|boolq | 1|acc |0.8153|± |0.0068| +|hellaswag | 0|acc |0.6126|± |0.0049| +| | |acc_norm|0.7995|± |0.0040| +|openbookqa | 0|acc |0.3660|± |0.0216| +| | |acc_norm|0.4600|± |0.0223| +|piqa | 0|acc |0.7922|± |0.0095| +| | |acc_norm|0.8112|± |0.0091| +|winogrande | 0|acc |0.7293|± |0.0125| +Average: 0.7036 +``` + +AGI-Eval +``` +| Task |Version| Metric |Value | |Stderr| +|------------------------------|------:|--------|-----:|---|-----:| +|agieval_aqua_rat | 0|acc |0.2008|± |0.0252| +| | |acc_norm|0.2126|± |0.0257| +|agieval_logiqa_en | 0|acc |0.3410|± |0.0186| +| | |acc_norm|0.3564|± |0.0188| +|agieval_lsat_ar | 0|acc |0.2261|± |0.0276| +| | |acc_norm|0.2174|± |0.0273| +|agieval_lsat_lr | 0|acc |0.3725|± |0.0214| +| | |acc_norm|0.3373|± |0.0210| +|agieval_lsat_rc | 0|acc |0.4684|± |0.0305| +| | |acc_norm|0.4572|± |0.0304| +|agieval_sat_en | 0|acc |0.6553|± |0.0332| +| | |acc_norm|0.5971|± |0.0343| +|agieval_sat_en_without_passage| 0|acc |0.4515|± |0.0348| +| | |acc_norm|0.4029|± |0.0343| +|agieval_sat_math | 0|acc |0.3273|± |0.0317| +| | |acc_norm|0.2636|± |0.0298| +Average: 0.3556 +``` +BigBench Reasoning Test +``` +| Task |Version| Metric |Value | |Stderr| +|------------------------------------------------|------:|---------------------|-----:|---|-----:| +|bigbench_causal_judgement | 0|multiple_choice_grade|0.5368|± |0.0363| +|bigbench_date_understanding | 0|multiple_choice_grade|0.7127|± |0.0236| +|bigbench_disambiguation_qa | 0|multiple_choice_grade|0.3023|± |0.0286| +|bigbench_geometric_shapes | 0|multiple_choice_grade|0.1003|± |0.0159| +| | |exact_str_match |0.0000|± |0.0000| +|bigbench_logical_deduction_five_objects | 0|multiple_choice_grade|0.2720|± |0.0199| +|bigbench_logical_deduction_seven_objects | 0|multiple_choice_grade|0.1986|± |0.0151| +|bigbench_logical_deduction_three_objects | 0|multiple_choice_grade|0.4500|± |0.0288| +|bigbench_movie_recommendation | 0|multiple_choice_grade|0.2880|± |0.0203| +|bigbench_navigate | 0|multiple_choice_grade|0.5000|± |0.0158| +|bigbench_reasoning_about_colored_objects | 0|multiple_choice_grade|0.5390|± |0.0111| +|bigbench_ruin_names | 0|multiple_choice_grade|0.3906|± |0.0231| +|bigbench_salient_translation_error_detection | 0|multiple_choice_grade|0.1844|± |0.0123| +|bigbench_snarks | 0|multiple_choice_grade|0.5249|± |0.0372| +|bigbench_sports_understanding | 0|multiple_choice_grade|0.5335|± |0.0159| +|bigbench_temporal_sequences | 0|multiple_choice_grade|0.2980|± |0.0145| +|bigbench_tracking_shuffled_objects_five_objects | 0|multiple_choice_grade|0.2048|± |0.0114| +|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1297|± |0.0080| +|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.4500|± |0.0288| +Average: 36.75 +``` + +This is a slight improvement on GPT4ALL Suite and BigBench Suite, with a degredation in AGIEval compared to the original hermes. + +Average Score Comparison between Nous-Hermes Llama-2 and OpenHermes Llama-2: +``` +| Bench | Nous-Hermes | OpenHermes | Change | +|------------------------------|------------:|------------|--------| +|GPT4All | 70.00| 70.36| +0.36| +|------------------------------------------------------------------| +|BigBench | 36.57| 36.75| +0.18| +|------------------------------------------------------------------| +|AGI Eval | 37.20| 35.56| -1.64| +``` + +## Training procedure + + +![image/png](https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/pqQ6MrMVy80hHEKSfqIX2.png) + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 2 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 128 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 300 +- num_epochs: 3 \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..786b8ba --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "NousResearch/Llama-2-13b-hf", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.34.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..5b424a7 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67320bd2e2ee0646b60d26e30f4e20cc0a04f74d5bc19f05cc09ca0365eb9ca9 +size 26031913113 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..09a41a3 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..65532c2 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..61e0755 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7fd1410a5509bda30cd61281d08d72cce99c218e8dd3fcdb6a4331045ec45a +size 4667