初始化项目,由ModelHub XC社区提供模型
Model: duyntnet/Azzurro-imatrix-GGUF Source: Original Platform
This commit is contained in:
62
README.md
Normal file
62
README.md
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
license: other
|
||||
language:
|
||||
- en
|
||||
pipeline_tag: text-generation
|
||||
inference: false
|
||||
tags:
|
||||
- transformers
|
||||
- gguf
|
||||
- imatrix
|
||||
- Azzurro
|
||||
---
|
||||
Quantizations of https://huggingface.co/MoxoffSpA/Azzurro
|
||||
|
||||
|
||||
# From original readme
|
||||
|
||||
## Usage
|
||||
|
||||
Be sure to install these dependencies before running the program
|
||||
|
||||
```python
|
||||
!pip install transformers torch sentencepiece
|
||||
```
|
||||
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
device = "cpu" # if you want to use the gpu make sure to have cuda toolkit installed and change this to "cuda"
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("MoxoffSpA/Azzurro")
|
||||
tokenizer = AutoTokenizer.from_pretrained("MoxoffSpA/Azzurro")
|
||||
|
||||
question = """Quanto è alta la torre di Pisa?"""
|
||||
context = """
|
||||
La Torre di Pisa è un campanile del XII secolo, famoso per la sua inclinazione. Alta circa 56 metri.
|
||||
"""
|
||||
|
||||
prompt = f"Domanda: {question}, contesto: {context}"
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
|
||||
|
||||
model_inputs = encodeds.to(device)
|
||||
model.to(device)
|
||||
|
||||
generated_ids = model.generate(
|
||||
model_inputs, # The input to the model
|
||||
max_new_tokens=128, # Limiting the maximum number of new tokens generated
|
||||
do_sample=True, # Enabling sampling to introduce randomness in the generation
|
||||
temperature=0.1, # Setting temperature to control the randomness, lower values make it more deterministic
|
||||
top_p=0.95, # Using nucleus sampling with top-p filtering for more coherent generation
|
||||
eos_token_id=tokenizer.eos_token_id # Specifying the token that indicates the end of a sequence
|
||||
)
|
||||
|
||||
decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
|
||||
trimmed_output = decoded_output.strip()
|
||||
print(trimmed_output)
|
||||
```
|
||||
Reference in New Issue
Block a user