From 4ef1615f3fb8c388592a408d6794c9bfff3d2679 Mon Sep 17 00:00:00 2001 From: "first_name.last_name" Date: Thu, 25 Apr 2024 22:02:20 +0000 Subject: [PATCH] add default model card --- README.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 10fc778..90e37b0 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,69 @@ --- +library_name: transformers +tags: +- 4-bit +- AWQ +- text-generation +- autotrain_compatible +- endpoints_compatible +pipeline_tag: text-generation inference: false +quantized_by: Suparious --- # Orenguteng/Lexi-Llama-3-8B-Uncensored AWQ -** PROCESSING .... ETA 30mins ** - - Model creator: [Orenguteng](https://huggingface.co/Orenguteng) - Original model: [Lexi-Llama-3-8B-Uncensored](https://huggingface.co/Orenguteng/Lexi-Llama-3-8B-Uncensored) + + +## How to use + +### Install the necessary packages + +```bash +pip install --upgrade autoawq autoawq-kernels +``` + +### Example Python code + +```python +from awq import AutoAWQForCausalLM +from transformers import AutoTokenizer, TextStreamer + +model_path = "solidrust/Lexi-Llama-3-8B-Uncensored-AWQ" +system_message = "You are Lexi-Llama-3-8B-Uncensored, incarnated as a powerful AI. You were created by Orenguteng." + +# Load model +model = AutoAWQForCausalLM.from_quantized(model_path, + fuse_layers=True) +tokenizer = AutoTokenizer.from_pretrained(model_path, + trust_remote_code=True) +streamer = TextStreamer(tokenizer, + skip_prompt=True, + skip_special_tokens=True) + +# Convert prompt to tokens +prompt_template = """\ +<|im_start|>system +{system_message}<|im_end|> +<|im_start|>user +{prompt}<|im_end|> +<|im_start|>assistant""" + +prompt = "You're standing on the surface of the Earth. "\ + "You walk one mile south, one mile west and one mile north. "\ + "You end up exactly where you started. Where are you?" + +tokens = tokenizer(prompt_template.format(system_message=system_message,prompt=prompt), + return_tensors='pt').input_ids.cuda() + +# Generate output +generation_output = model.generate(tokens, + streamer=streamer, + max_new_tokens=512) +``` + ### About AWQ AWQ is an efficient, accurate and blazing-fast low-bit weight quantization method, currently supporting 4-bit quantization. Compared to GPTQ, it offers faster Transformers-based inference with equivalent or better quality compared to the most commonly used GPTQ settings.