commit 5bbd71356dc7654ee09d40fc41c1cca5a6e4b64b Author: ModelHub XC Date: Thu May 7 10:45:34 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: dphn/dolphin-2.8-mistral-7b-v02 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..b916610 --- /dev/null +++ b/README.md @@ -0,0 +1,257 @@ +--- +base_model: alpindale/Mistral-7B-v0.2-hf +language: +- en +license: apache-2.0 +datasets: +- cognitivecomputations/dolphin +- cognitivecomputations/dolphin-coder +- cognitivecomputations/samantha-data +- jondurbin/airoboros-2.2.1 +- teknium/openhermes-2.5 +- m-a-p/Code-Feedback +- m-a-p/CodeFeedback-Filtered-Instruction +model-index: +- name: dolphin-2.8-mistral-7b-v02 + results: + - task: + type: text-generation + dataset: + type: openai_humaneval + name: HumanEval + metrics: + - name: pass@1 + type: pass@1 + value: 0.469 + verified: false +--- + +# Dolphin 2.8 Mistral 7b v0.2 🐬 + +By Eric Hartford and Cognitive Computations + +[![Discord](https://img.shields.io/discord/1156064224225808488?logo=Discord&logoColor=%23ffffff&label=Discord&link=https%3A%2F%2Fdiscord.gg%2FtCMkMDDHwm)](https://discord.gg/cognitivecomputations) +Discord: https://discord.gg/cognitivecomputations + + + +My appreciation for the sponsors of Dolphin 2.8: +- [Crusoe Cloud](https://crusoe.ai/) - provided excellent on-demand 10xL40S node +- [Winston Sou](https://twitter.com/WinsonDabbles) - Along with a generous anonymous sponsor, donated a massive personally owned compute resource! +- [Abacus AI](https://abacus.ai/) - my employer and partner in many things. + +This model is based on [Mistral-7b-v0.2](https://huggingface.co/alpindale/Mistral-7B-v0.2-hf) a new base model released by MistralAI on March 23, 2024 but they have not yet published on HuggingFace. Thanks to @alpindale for converting / publishing. + +The base model has 32k context, and the full-weights fine-tune was with 16k sequence lengths. + +It took 3 days on 10x L40S provided by [Crusoe Cloud](https://crusoe.ai/) + +Dolphin-2.8 has a variety of instruction, conversational, and coding skills. + +Dolphin is uncensored. I have filtered the dataset to remove alignment and bias. This makes the model more compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant to any requests, even unethical ones. Please read my blog post about uncensored models. https://erichartford.com/uncensored-models You are responsible for any content you create using this model. Enjoy responsibly. + +Dolphin is licensed Apache 2.0. I grant permission for any use including commercial. Dolphin was trained on data generated from GPT4 among other models. + +# Evals + +``` +{ + "arc_challenge": { + "acc,none": 0.5921501706484642, + "acc_stderr,none": 0.014361097288449701, + "acc_norm,none": 0.6339590443686007, + "acc_norm_stderr,none": 0.014077223108470139 + }, + "gsm8k": { + "exact_match,strict-match": 0.4783927217589083, + "exact_match_stderr,strict-match": 0.013759618667051773, + "exact_match,flexible-extract": 0.5367702805155421, + "exact_match_stderr,flexible-extract": 0.013735191956468648 + }, + "hellaswag": { + "acc,none": 0.6389165504879506, + "acc_stderr,none": 0.004793330525656218, + "acc_norm,none": 0.8338976299541924, + "acc_norm_stderr,none": 0.00371411888431746 + }, + "mmlu": { + "acc,none": 0.6122347243982339, + "acc_stderr,none": 0.003893774654142997 + }, + "truthfulqa_mc2": { + "acc,none": 0.5189872652778472, + "acc_stderr,none": 0.014901128316426086 + }, + "winogrande": { + "acc,none": 0.7971586424625099, + "acc_stderr,none": 0.011301439925936643 + } +} +``` + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) +
See axolotl config + +axolotl version: `0.4.0` +```yaml + +base_model: alpindale/Mistral-7B-v0.2-hf +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer +is_mistral_derived_model: true + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: /workspace/datasets/dolphin201-sharegpt2.jsonl + type: sharegpt + - path: /workspace/datasets/dolphin-coder-translate-sharegpt2.jsonl + type: sharegpt + - path: /workspace/datasets/dolphin-coder-codegen-sharegpt2.jsonl + type: sharegpt + - path: /workspace/datasets/m-a-p_Code-Feedback-sharegpt.jsonl + type: sharegpt + - path: /workspace/datasets/m-a-p_CodeFeedback-Filtered-Instruction-sharegpt.jsonl + type: sharegpt + - path: /workspace/datasets/not_samantha_norefusals.jsonl + type: sharegpt + - path: /workspace/datasets/openhermes2_5-sharegpt.jsonl + type: sharegpt + +chat_template: chatml + +dataset_prepared_path: last_run_prepared +val_set_size: 0.001 +output_dir: /workspace/dolphin-2.8-mistral-7b + +sequence_len: 16384 +sample_packing: true +pad_to_sequence_len: true + +wandb_project: dolphin +wandb_entity: +wandb_watch: +wandb_run_id: +wandb_log_model: + +gradient_accumulation_steps: 8 +micro_batch_size: 3 +num_epochs: 4 +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000005 +optimizer: adamw_bnb_8bit + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 + +eval_steps: 73 +eval_table_size: +eval_table_max_new_tokens: +eval_sample_packing: false +saves_per_epoch: +save_steps: 73 +save_total_limit: 2 +debug: +deepspeed: deepspeed_configs/zero3_bf16.json +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + eos_token: "<|im_end|>" +tokens: + - "<|im_start|>" + +``` + +

+ +# workspace/dolphin-2.8-mistral-7b + +This model is a fine-tuned version of [alpindale/Mistral-7B-v0.2-hf](https://huggingface.co/alpindale/Mistral-7B-v0.2-hf) on the None dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4828 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-06 +- train_batch_size: 3 +- eval_batch_size: 3 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 10 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 240 +- total_eval_batch_size: 30 +- optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 10 +- num_epochs: 4 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 1.1736 | 0.0 | 1 | 1.0338 | +| 0.6106 | 0.36 | 73 | 0.5439 | +| 0.5766 | 0.72 | 146 | 0.5171 | +| 0.5395 | 1.06 | 219 | 0.5045 | +| 0.5218 | 1.42 | 292 | 0.4976 | +| 0.5336 | 1.78 | 365 | 0.4915 | +| 0.5018 | 2.13 | 438 | 0.4885 | +| 0.5113 | 2.48 | 511 | 0.4856 | +| 0.5066 | 2.84 | 584 | 0.4838 | +| 0.4967 | 3.19 | 657 | 0.4834 | +| 0.4956 | 3.55 | 730 | 0.4830 | +| 0.5026 | 3.9 | 803 | 0.4828 | + + +### Framework versions + +- Transformers 4.40.0.dev0 +- Pytorch 2.2.1+cu121 +- Datasets 2.18.0 +- Tokenizers 0.15.0 + + +# Quants + +- [dagbs/-GGUF](https://huggingface.co/dagbs/dolphin-2.8-mistral-7b-v02-GGUF) + +- [bartowski/ExLlamaV2](https://huggingface.co/bartowski/dolphin-2.8-mistral-7b-v02-exl2) + +- [solidrust/AWQ](https://huggingface.co/solidrust/dolphin-2.8-mistral-7b-v02-AWQ) \ No newline at end of file diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..e36863d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..3d169e9 --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "alpindale/Mistral-7B-v0.2-hf", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/eval.sh b/eval.sh new file mode 100644 index 0000000..f50182f --- /dev/null +++ b/eval.sh @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9d6750fd8cd592c75431e0431588d1b6ff6672cfc3a37920ac013b80a7495a +size 1545 diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..36fc4c3 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6951f59b68c9c0baf1365378b4abcfdf5dd741be394c8ef07aca1786886d35 +size 138509 diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..16dd90a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "do_sample": true, + "eos_token_id": 2, + "transformers_version": "4.40.0.dev0" +} diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..469f225 --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cacee5b1690b6c100a51239811b0ea886a579e6363eed27bbc0c7e9bb897fba8 +size 4943178720 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..c60c2fb --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7432647a6747c2388f83943740c8810bf55fcfbba368bf0cd9d2cd43caba795 +size 4999819336 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..8d0f76b --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb2fb5e1af1963398eff384c1ea89273d34f4842864563be8ec3bb693ab0d2d +size 4540532728 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..742c7cb --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88294071635bab4dc0ebbe97ba11a9895ea161df1a1be955a4406f730650df2d +size 23950 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..40b1c6d --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..8b443ef --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..65a1c10 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20375580fbcae84f76d814d1b8e98a84f688a0936be6a6ba8dce93285474233c +size 1675