commit 8c0cb1fedd545508eb660656191274e76f300c5b Author: ModelHub XC Date: Tue Jun 9 12:26:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: RedHatAI/SparseLLama-2-7b-ultrachat_200k-pruned_50.2of4 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..78a3db9 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +--- +datasets: +- HuggingFaceH4/ultrachat_200k +language: +- en +pipeline_tag: text-generation +--- + +# SparseLlama-2-7b-ultrachat_200k-pruned_50.2of4 + +## Model Overview +- **Model Architecture:** Llama-2 + - **Input:** Text + - **Output:** Text +- **Model Optimizations:** + - **Pruned:** 50% 2:4 +- **Release Date:** 6/28/2024 +- **Version:** 1.0 +- **Model Developers:** Neural Magic + +Compressed version of [Llama-2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf) specialized for text-generation. +This model was obtained by fine-tuning the Sparse Foundational model [Sparse-Llama-2-7b-pruned_50.2of4](https://huggingface.co/nm-testing/SparseLlama-2-7b-pruned_50.2of4) on the [ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) dataset. +It achieves a win rate of 62.1% on the [AlpacaEval](https://github.com/tatsu-lab/alpaca_eval) benchmark (version 1.0) when using [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) as evaluator, whereas the dense [Llama-2-7b-ultrachat200k](https://huggingface.co/neuralmagic/Llama-2-7b-ultrachat200k) model achieves 57.6% win rate. + +This model was produced as part if Neural Magic's Sparse Foundational Models initiative, and demostrates the capability of Sparse Foundational Models to transfer to the text-generation domain. + +**Note:** This model uses the chat template from [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta). + +## Model Optimizations + +This model is derived from the Sparse Foundational model [Sparse-Llama-2-7b-pruned_50.2of4](https://huggingface.co/nm-testing/SparseLlama-2-7b-pruned_50.2of4), which was obtained by applying the [SparseGPT](https://arxiv.org/abs/2301.00774) algorithm to prune [Llama-2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf) to 50% sparsity with a 2:4 mask. +This optimization reduces the number of parameters by 50%, reducing the disk size and FLOPs by the same level. + +## Evaluation + +This model was evaluated in the [AlpacaEval](https://github.com/tatsu-lab/alpaca_eval) benchmark using [Llama-2-70b-chat](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) as evaluator. + +## Accuracy + +| Model | Win rate | Recovery | +| :----- | :--------: | :--------: | +| [Llama-2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf) | 3.7% | -- | +| [Llama-2-7b-ultrachat200k](https://huggingface.co/neuralmagic/Llama-2-7b-ultrachat200k) | 57.6% | -- | +| SparseLlama-2-7b-ultrachat_200k-pruned_50.2of4 | 62.1% | 108% | \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..af8d805 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "training", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.2", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..589fbca --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.40.2" +} diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..3d0fb9d --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab3c867f8396a080ba3bea4fe9bb1408be7ee835b3bd6cc7c396c30a036cf27f +size 4938985352 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..4196f2c --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a71b0ba805f3ddb3c3bc490a84e5e958adf746ad764a582ce1bcf9faa4e823 +size 4947390880 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..fdc3e77 --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e356c3b2c877b236cf445b20c3d733186a5ed227c50f9401702dedc50e557482 +size 3590488816 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..b3d8d7d --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091fdb60b8ee3713df38fd6f9849c3aa82d7eeeb5f6d224a77139543b1db8652 +size 23950 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..99b68f6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..5d592ae --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf467c9e0f536bda271283c6ef85eb1a943e3196b621c8a912d64953b205df83 +size 1842795 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..88d00be --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a5374b4ffd718e138a7baf2a3e38847444e6105749869c95c059bc5ce5cfab +size 1438