From d27239b8e34b9ec93f35f33f5e4ba765a33be590 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 23 Apr 2026 09:47:58 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: RedHatAI/Llama-2-7b-ultrachat200k-pruned_50 Source: Original Platform --- .gitattributes | 35 ++++++++++++++ README.md | 71 +++++++++++++++++++++++++++++ arc_challenge_25shot_bs16_bf16.json | 25 ++++++++++ config.json | 29 ++++++++++++ configuration.json | 1 + generation_config.json | 6 +++ gsm8k_5shot_bs16_bf16.json | 23 ++++++++++ hellaswag_10shot_bs16_bf16.json | 25 ++++++++++ mmlu_5shot_bs4_bf16.json | 3 ++ model-00001-of-00003.safetensors | 3 ++ model-00002-of-00003.safetensors | 3 ++ model-00003-of-00003.safetensors | 3 ++ model.safetensors.index.json | 3 ++ special_tokens_map.json | 23 ++++++++++ tokenizer.json | 3 ++ tokenizer.model | 3 ++ tokenizer_config.json | 3 ++ truthfulqa_mc_0shot_bs16_bf16.json | 25 ++++++++++ winogrande_5shot_bs16_bf16.json | 23 ++++++++++ 19 files changed, 310 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 arc_challenge_25shot_bs16_bf16.json create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 gsm8k_5shot_bs16_bf16.json create mode 100644 hellaswag_10shot_bs16_bf16.json create mode 100644 mmlu_5shot_bs4_bf16.json create mode 100644 model-00001-of-00003.safetensors create mode 100644 model-00002-of-00003.safetensors create mode 100644 model-00003-of-00003.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 truthfulqa_mc_0shot_bs16_bf16.json create mode 100644 winogrande_5shot_bs16_bf16.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee830f3 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +--- +base_model: neuralmagic/Llama-2-7b-pruned50-retrained +inference: true +model_type: llama +pipeline_tag: text-generation +datasets: + - cerebras/SlimPajama-627B + - HuggingFaceH4/ultrachat_200k +tags: +- sparse +- chat +--- + +# Llama-2-7b-pruned50-retrained-ultrachat + +This repo contains a [50% sparse Llama 2 7B](https://huggingface.co/neuralmagic/Llama-2-7b-pruned50-retrained) finetuned for chat tasks using the [UltraChat 200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) dataset. + +Official model weights from [Enabling High-Sparsity Foundational Llama Models with Efficient Pretraining and Deployment](https://arxiv.org/abs/2405.03594). + +**Authors**: Neural Magic, Cerebras + +## Usage + +Below we share some code snippets on how to get quickly started with running the model. + +### Sparse Transfer + +By leveraging a pre-sparsified model's structure, you can efficiently fine-tune on new data, leading to reduced hyperparameter tuning, training times, and computational costs. Learn about this process [here](https://neuralmagic.github.io/docs-v2/get-started/transfer). + +### Running the model + +This model may be run with the transformers library. For accelerated inference with sparsity, deploy with [nm-vllm](https://github.com/neuralmagic/nm-vllm) or [deepsparse](https://github.com/neuralmagic/deepsparse). + +```python +# pip install transformers accelerate +from transformers import AutoTokenizer, AutoModelForCausalLM + +tokenizer = AutoTokenizer.from_pretrained("neuralmagic/Llama-2-7b-pruned50-retrained-ultrachat") +model = AutoModelForCausalLM.from_pretrained("neuralmagic/Llama-2-7b-pruned50-retrained-ultrachat", device_map="auto") + +input_text = "Write me a poem about Machine Learning." +input_ids = tokenizer.apply_chat_template(input_text, add_generation_prompt=True, return_tensors="pt").to("cuda") + +outputs = model.generate(**input_ids) +print(tokenizer.decode(outputs[0])) +``` + +## Evaluation Benchmark Results + +Model evaluation metrics and results. + +| Benchmark | Metric | Llama-2-7b-ultrachat | Llama-2-7b-pruned50-retrained-ultrachat | +|------------------------------------------------|---------------|-------------|-------------------------------| +| [MMLU](https://arxiv.org/abs/2009.03300) | 5-shot | 46.1% | 41.4% | +| [HellaSwag](https://arxiv.org/abs/1905.07830) | 0-shot | 75.9% | 73.5% | +| [WinoGrande](https://arxiv.org/abs/1907.10641) | 5-shot | 72.6% | 67.8% | +| [ARC-c](https://arxiv.org/abs/1911.01547) | 25-shot | 52.8% | 49.0% | +| [TruthfulQA](https://arxiv.org/abs/2109.07958) | 5-shot | 44.8% | 39.5% | +| [GSM8K](https://arxiv.org/abs/2110.14168) | 5-shot | 12.4% | 8.0% | +| [AlpacaEval](https://arxiv.org/abs/2107.03374) ([Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) evaluator) | Win rate | 57.6% | 60.1% | +| [AlpacaEval](https://arxiv.org/abs/2107.03374) (GPT-4 Turbo evaluator) | Win rate | 60.6% | 59.0% | + + +## Model Training Details + +This model was obtained by sparse-tranfer of the sparse foundational model [Llama-2-7b-pruned50-retrained](https://huggingface.co/neuralmagic/Llama-2-7b-pruned50-retrained) on the [ultrachat_200k](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) dataset. +Training was perfomerd for 2 epochs and used the [SquareHead](https://arxiv.org/abs/2310.06927) knowledge distillation with [Llama-2-7b-ultrachat](https://huggingface.co/neuralmagic/Llama-2-7b-ultrachat) as teacher. + +## Help + +For further support, and discussions on these models and AI in general, join [Neural Magic's Slack Community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ) \ No newline at end of file diff --git a/arc_challenge_25shot_bs16_bf16.json b/arc_challenge_25shot_bs16_bf16.json new file mode 100644 index 0000000..9b732a6 --- /dev/null +++ b/arc_challenge_25shot_bs16_bf16.json @@ -0,0 +1,25 @@ +{ + "results": { + "arc_challenge": { + "acc": 0.4667235494880546, + "acc_stderr": 0.01457899585960581, + "acc_norm": 0.48976109215017066, + "acc_norm_stderr": 0.014608326906285019 + } + }, + "versions": { + "arc_challenge": 0 + }, + "config": { + "model": "sparseml", + "model_args": "pretrained=/network/alexandre/research/cerebras/llama2_7B_sparse50_45B_retrained/ultrachat200k/llama2_7B_45B_sparse50_LR2e-4_GC2_E2/training,dtype=bfloat16", + "num_fewshot": 25, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda:1", + "no_cache": true, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..2d33470 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "neuralmagic/Llama-2-7b-pruned50-retrained-ultrachat", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "tokenizer_class": "LlamaTokenizerFast", + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9188e67 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.40.0" +} diff --git a/gsm8k_5shot_bs16_bf16.json b/gsm8k_5shot_bs16_bf16.json new file mode 100644 index 0000000..770b9e7 --- /dev/null +++ b/gsm8k_5shot_bs16_bf16.json @@ -0,0 +1,23 @@ +{ + "results": { + "gsm8k": { + "acc": 0.07960576194086429, + "acc_stderr": 0.007455924338676284 + } + }, + "versions": { + "gsm8k": 0 + }, + "config": { + "model": "sparseml", + "model_args": "pretrained=/network/alexandre/research/cerebras/llama2_7B_sparse50_45B_retrained/ultrachat200k/llama2_7B_45B_sparse50_LR2e-4_GC2_E2/training,dtype=bfloat16", + "num_fewshot": 5, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda:6", + "no_cache": true, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/hellaswag_10shot_bs16_bf16.json b/hellaswag_10shot_bs16_bf16.json new file mode 100644 index 0000000..9e538fc --- /dev/null +++ b/hellaswag_10shot_bs16_bf16.json @@ -0,0 +1,25 @@ +{ + "results": { + "hellaswag": { + "acc": 0.5541724756024696, + "acc_stderr": 0.004960408362133245, + "acc_norm": 0.7353116908982275, + "acc_norm_stderr": 0.00440265476726963 + } + }, + "versions": { + "hellaswag": 0 + }, + "config": { + "model": "sparseml", + "model_args": "pretrained=/network/alexandre/research/cerebras/llama2_7B_sparse50_45B_retrained/ultrachat200k/llama2_7B_45B_sparse50_LR2e-4_GC2_E2/training,dtype=bfloat16", + "num_fewshot": 10, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda:6", + "no_cache": true, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/mmlu_5shot_bs4_bf16.json b/mmlu_5shot_bs4_bf16.json new file mode 100644 index 0000000..7a4e93d --- /dev/null +++ b/mmlu_5shot_bs4_bf16.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4fa1ee19845996940f26ad8f606f028ff4bee1db0cba118079e0d4f242310d +size 14252 diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..edc730a --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55ad804981e5548cf63e29e9921cf0961dfd13f4c155e0fccfb18188f816991 +size 4938985352 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..b176c80 --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86e033c2ae1911684278dc4b72720dc125bf9dd18766aec6dfd0da517e70c04 +size 4947390880 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..6299ce4 --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049ac64154b2db7e46b11d45a5f2fbe7f4af694a8af96053da5d297b79ab847b +size 3590488816 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..b3d8d7d --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091fdb60b8ee3713df38fd6f9849c3aa82d7eeeb5f6d224a77139543b1db8652 +size 23950 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..451134b --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..21779d6 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd04f0eadf90287bd26e1a183ac487d8a141b09b06aecb7725bbdd343640f2e +size 1842767 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..2f17fcb --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c83dc4260b41a1c38bb2a26de2ca1f2c7a995aaeea7f1460eda7e0986064d06 +size 1365 diff --git a/truthfulqa_mc_0shot_bs16_bf16.json b/truthfulqa_mc_0shot_bs16_bf16.json new file mode 100644 index 0000000..7ed3bf3 --- /dev/null +++ b/truthfulqa_mc_0shot_bs16_bf16.json @@ -0,0 +1,25 @@ +{ + "results": { + "truthfulqa_mc": { + "mc1": 0.2631578947368421, + "mc1_stderr": 0.015415241740237012, + "mc2": 0.39511662473536746, + "mc2_stderr": 0.014932053345393062 + } + }, + "versions": { + "truthfulqa_mc": 1 + }, + "config": { + "model": "sparseml", + "model_args": "pretrained=/network/alexandre/research/cerebras/llama2_7B_sparse50_45B_retrained/ultrachat200k/llama2_7B_45B_sparse50_LR2e-4_GC2_E2/training,dtype=bfloat16", + "num_fewshot": 0, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda:4", + "no_cache": true, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file diff --git a/winogrande_5shot_bs16_bf16.json b/winogrande_5shot_bs16_bf16.json new file mode 100644 index 0000000..c916825 --- /dev/null +++ b/winogrande_5shot_bs16_bf16.json @@ -0,0 +1,23 @@ +{ + "results": { + "winogrande": { + "acc": 0.6779794790844514, + "acc_stderr": 0.01313207020207106 + } + }, + "versions": { + "winogrande": 0 + }, + "config": { + "model": "sparseml", + "model_args": "pretrained=/network/alexandre/research/cerebras/llama2_7B_sparse50_45B_retrained/ultrachat200k/llama2_7B_45B_sparse50_LR2e-4_GC2_E2/training,dtype=bfloat16", + "num_fewshot": 5, + "batch_size": "16", + "batch_sizes": [], + "device": "cuda:2", + "no_cache": true, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} \ No newline at end of file