commit 8eb589696dcdff1a86df15ae8c08a81e1dc5a8f6 Author: ModelHub XC Date: Wed May 6 22:36:51 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: RedHatAI/TinyLlama-1.1B-Chat-v1.0-pruned2.4 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..8b831fc --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +--- +base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 +inference: true +model_type: Llama +hardware_tag: +- Intel Xeon +license: apache-2.0 +license_name: apache-2.0 +license_link: >- + https://choosealicense.com/licenses/apache-2.0/ +name: RedHatAI/TinyLlama-1.1B-Chat-v1.0-pruned2.4 +description: >- + Pruned TinyLlama model. +readme: >- + https://huggingface.co/RedHatAI/TinyLlama-1.1B-Chat-v1.0-pruned2.4/blob/main/README.md +tags: +- nm-vllm +- sparse +--- +## TinyLlama-1.1B-Chat-v1.0-pruned2.4 +This repo contains model files for [TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) optimized for [NM-vLLM](https://github.com/neuralmagic/nm-vllm), a high-throughput serving engine for compressed LLMs. + +This model was pruned with [SparseGPT](https://arxiv.org/abs/2301.00774), using [SparseML](https://github.com/neuralmagic/sparseml). + +## Inference +Install [NM-vLLM](https://github.com/neuralmagic/nm-vllm) for fast inference and low memory-usage: +```bash +pip install nm-vllm[sparse] +``` +Run in a Python pipeline for local inference: +```python +from vllm import LLM, SamplingParams + +model = LLM("nm-testing/TinyLlama-1.1B-Chat-v1.0-pruned2.4", sparsity="semi_structured_sparse_w16a16") +prompt = "How to make banana bread?" +formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" + +sampling_params = SamplingParams(max_tokens=100,temperature=0,repetition_penalty=1.3) +outputs = model.generate(formatted_prompt, sampling_params=sampling_params) +print(outputs[0].outputs[0].text) +""" +Banana bread is a delicious dessert that is made with bananas. Here is how to make banana bread: + +1. Firstly, you need to cut bananas into small pieces. +2. Then, you need to slice the bananas into small pieces +""" +``` + +## Prompt template + +``` +<|im_start|>user +{prompt}<|im_end|> +<|im_start|>assistant + +``` + +## Sparsification +For details on how this model was sparsified, see the `recipe.yaml` in this repo and follow the instructions below. + +Install [SparseML](https://github.com/neuralmagic/sparseml): +```bash +git clone https://github.com/neuralmagic/sparseml +pip install -e "sparseml[transformers]" +``` + +Replace the recipe as you like and run this one-shot compression script to apply SparseGPT: +```python +import sparseml.transformers + +original_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" +calibration_dataset = "open_platypus" +output_directory = "output/" + +recipe = """ +test_stage: + obcq_modifiers: + SparseGPTModifier: + sparsity: 0.5 + sequential_update: true + mask_structure: '2:4' + targets: ['re:model.layers.\d*$'] +""" + +# Apply SparseGPT to the model +sparseml.transformers.oneshot( + model=original_model_name, + dataset=calibration_dataset, + recipe=recipe, + output_dir=output_directory, +) +``` + +## Slack + +For further support, and discussions on these models and AI in general, join [Neural Magic's Slack Community](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-q1a1cnvo-YBoICSIw3L1dmQpjBeDurQ) \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..d2596d0 --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5632, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "1.7.0.20240130", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..0790eb4 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 2048, + "pad_token_id": 0, + "transformers_version": "1.7.0.20240130" +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..13c6d94 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:713193f5316dfd143409c67532d10d8674100aa1082229273ddb2574525306e4 +size 2200164718 diff --git a/recipe.yaml b/recipe.yaml new file mode 100644 index 0000000..9328bf4 --- /dev/null +++ b/recipe.yaml @@ -0,0 +1,10 @@ +test_stage: + obcq_modifiers: + SparseGPTModifier: + sparsity: 0.5 + block_size: 128 + sequential_update: false + quantize: false + percdamp: 0.01 + mask_structure: '2:4' + targets: ['re:model.layers.\d*$'] diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..492d4b2 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..af7720c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b16520bbfede7dc208605369b0e6e08bfdaecbbc0f9ec709eccecbcef37f5b3 +size 1843031 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..a249d4b --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b41ba7d0eb91e77914ca3dafde559ea3e19878769b7e68409e89bed5222e77a +size 1289