初始化项目,由ModelHub XC社区提供模型
Model: kaitchup/Llama-2-7b-hf-gptq-2bit Source: Original Platform
This commit is contained in:
18
README.md
Normal file
18
README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
---
|
||||
license: mit
|
||||
---
|
||||
|
||||
Llama 2 7B quantized in 2-bit with GPTQ.
|
||||
|
||||
```
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from optimum.gptq import GPTQQuantizer
|
||||
import torch
|
||||
w = 2
|
||||
model_path = meta-llama/Llama-2-7b-hf
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
|
||||
quantizer = GPTQQuantizer(bits=w, dataset="c4", model_seqlen = 4096)
|
||||
quantized_model = quantizer.quantize_model(model, tokenizer)
|
||||
```
|
||||
Reference in New Issue
Block a user