初始化项目,由ModelHub XC社区提供模型
Model: Josephgflowers/TinyLlama-Cinder-Agent-Rag Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
---
|
||||||
|
license: mit
|
||||||
|
base_model: Josephgflowers/TinyLlama-3T-Cinder-v1.2
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
model-index:
|
||||||
|
- name: TinyLlama-Cinder-Agent-Rag
|
||||||
|
results: []
|
||||||
|
---
|
||||||
|
|
||||||
|
This is first pass training. Further training and model update coming.
|
||||||
|
|
||||||
|
# TinyLlama-Cinder-Agent-Rag
|
||||||
|
Special Thanks to https://nationtech.io/ for their generous sponorship in training this model.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
This model is a fine-tuned version of [Josephgflowers/TinyLlama-3T-Cinder-v1.2](https://huggingface.co/Josephgflowers/TinyLlama-3T-Cinder-v1.2) on https://huggingface.co/datasets/Josephgflowers/agent_1.
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
This models is trained for RAG, Summary, Function Calling and Tool usage. Trained off of Cinder. Cinder is a chatbot designed for chat about STEM topics and storytelling. More information coming.
|
||||||
|
|
||||||
|
More model versions coming soon.
|
||||||
|
|
||||||
|
See https://huggingface.co/Josephgflowers/TinyLlama-Cinder-Agent-Rag/blob/main/tinyllama_agent_cinder_txtai-rag.py
|
||||||
|
For usage example with wiki rag.
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
RAG, Chat, Summary, and tool usage.
|
||||||
|
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
### Training hyperparameters
|
||||||
|
|
||||||
|
The following hyperparameters were used during training:
|
||||||
|
- learning_rate: 5e-05
|
||||||
|
- train_batch_size: 12
|
||||||
|
- eval_batch_size: 32
|
||||||
|
- seed: 42
|
||||||
|
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
||||||
|
- lr_scheduler_type: linear
|
||||||
|
- num_epochs: 1.0
|
||||||
|
- mixed_precision_training: Native AMP
|
||||||
|
|
||||||
|
### Training results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- Transformers 4.41.0.dev0
|
||||||
|
- Pytorch 2.2.2+cu121
|
||||||
|
- Datasets 2.19.1
|
||||||
|
- Tokenizers 0.19.1
|
||||||
9
all_results.json
Normal file
9
all_results.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"total_flos": 9.08850744042455e+17,
|
||||||
|
"train_loss": 0.8321872255313152,
|
||||||
|
"train_runtime": 82908.8976,
|
||||||
|
"train_samples": 71495,
|
||||||
|
"train_samples_per_second": 0.862,
|
||||||
|
"train_steps_per_second": 0.072
|
||||||
|
}
|
||||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "Josephgflowers/TinyLlama-3T-Cinder-v1.2",
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 2048,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 5632,
|
||||||
|
"max_position_embeddings": 2048,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 22,
|
||||||
|
"num_key_value_heads": 4,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.41.0.dev0",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 32000
|
||||||
|
}
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||||
7
generation_config.json
Normal file
7
generation_config.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"max_length": 2048,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"transformers_version": "4.41.0.dev0"
|
||||||
|
}
|
||||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:0543fbabf0edae6ff52542b33203d63e0b477429f6973399eef6468b412371a4
|
||||||
|
size 4400216536
|
||||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
158
tinyllama_agent_cinder_txtai-rag.py
Normal file
158
tinyllama_agent_cinder_txtai-rag.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from torchsummary import summary
|
||||||
|
from accelerate import dispatch_model, infer_auto_device_map
|
||||||
|
from txtai import Embeddings
|
||||||
|
from txtai.pipeline import LLM
|
||||||
|
#pip3 install git+https://github.com/neuml/txtai#egg=txtai[pipeline-llm]
|
||||||
|
|
||||||
|
|
||||||
|
# Wikipedia Embeddings Database
|
||||||
|
embeddings = Embeddings()
|
||||||
|
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia")
|
||||||
|
|
||||||
|
#os.environ['OMP_NUM_THREADS'] = '6'
|
||||||
|
|
||||||
|
#
|
||||||
|
#DuckDuckGo
|
||||||
|
#
|
||||||
|
def query_duckduckgo(query):
|
||||||
|
"""Query DuckDuckGo API for a given search term and return the results."""
|
||||||
|
url = "https://api.duckduckgo.com/"
|
||||||
|
params = {
|
||||||
|
'q': query,
|
||||||
|
'format': 'json',
|
||||||
|
'pretty': '1',
|
||||||
|
'no_html': '1'
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(url, params=params)
|
||||||
|
response.raise_for_status() # Raises an HTTPError for bad responses
|
||||||
|
return response.json()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def handle_query(user_input):
|
||||||
|
"""Process user input and display the answer from DuckDuckGo."""
|
||||||
|
result = query_duckduckgo(user_input)
|
||||||
|
if result and 'AbstractText' in result and result['AbstractText']:
|
||||||
|
print(result['AbstractText'])
|
||||||
|
else:
|
||||||
|
print("DuckDuck Go failed. Going to Wiki.")
|
||||||
|
result ="\n".join([x["text"] for x in embeddings.search(user_input)])
|
||||||
|
print("Restults from Wiki: \n",result)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Load model and tokenizer
|
||||||
|
model_path = "Josephgflowers/TinyLlama-Cinder-Agent-Rag"#
|
||||||
|
# Define the device (CPU or GPU)
|
||||||
|
#device = torch.device("cuda")
|
||||||
|
device = torch.device("cpu")
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(model_path,ignore_mismatched_sizes=True).to(device)
|
||||||
|
|
||||||
|
print(model)
|
||||||
|
total_params = sum(p.numel() for p in model.parameters())
|
||||||
|
print("Total number of parameters: ", total_params)
|
||||||
|
|
||||||
|
sequence_length = 2048 # or whatever your specific sequence length is
|
||||||
|
#embedding_size = 2048 # as per your model's definition
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||||
|
stop_token =2 #3556 </ #2 #128247
|
||||||
|
#'</s>' 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def chat_with_model(prompt_text, stop_token, model, tokenizer):
|
||||||
|
# Encode the prompt text
|
||||||
|
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt").to(device)
|
||||||
|
|
||||||
|
# Generate response
|
||||||
|
output_sequences = model.generate(
|
||||||
|
input_ids=encoded_prompt,
|
||||||
|
#max_length=len(encoded_prompt[0]) + 256,
|
||||||
|
max_new_tokens=256,
|
||||||
|
temperature=0.1,
|
||||||
|
repetition_penalty=1.2,
|
||||||
|
top_k=20,
|
||||||
|
top_p=0.9,
|
||||||
|
do_sample=True,
|
||||||
|
num_return_sequences=1,
|
||||||
|
eos_token_id=stop_token
|
||||||
|
)
|
||||||
|
|
||||||
|
# Decode the generated sequence
|
||||||
|
generated_sequence = output_sequences[0].tolist()
|
||||||
|
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
|
||||||
|
response_text = text[len(prompt_text):].strip() # Extract only the response text
|
||||||
|
#response_text = response_text.replace("<s>","").replace("</s>","")
|
||||||
|
return response_text
|
||||||
|
|
||||||
|
# Initialize conversation history
|
||||||
|
|
||||||
|
conversation_history = ''#'<s>\n<|system|>\nYou are a helpful assistant.</s>\n'#'<s>\n<|system|>\nYou are a
|
||||||
|
|
||||||
|
# Get user's preference for input mode and character name
|
||||||
|
input_mode = 'text' ##input("Enter 'text' for text input or 'speech' for speech input: ").lower()
|
||||||
|
character_name = '<|user|>' # input("Enter your character name (USER, JONAH, JOSEPH, KIMBERLY, etc.): ")
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
#handle_query(user_input)
|
||||||
|
# Chat loop
|
||||||
|
num_chat = 1
|
||||||
|
while num_chat <= 20:
|
||||||
|
question = input(f"{character_name}: ")
|
||||||
|
user_input = question # Get text input from user
|
||||||
|
#context = "\n".join([x["text"] for x in embeddings.search(question)])
|
||||||
|
context= handle_query(user_input)
|
||||||
|
#print('History: '+ conversation_history)
|
||||||
|
prompt_text = f"""
|
||||||
|
<s>
|
||||||
|
<|system|>
|
||||||
|
You will be given documentation as context to answer a users question. You are an expert at summarization. Pay close attention to the key concepts. Use only information from the Context in your answer.
|
||||||
|
</s>
|
||||||
|
<|data|>
|
||||||
|
Context:
|
||||||
|
{context}
|
||||||
|
-Use only the above context to answer the question.
|
||||||
|
</s>
|
||||||
|
<|user|>
|
||||||
|
Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary.
|
||||||
|
</s>
|
||||||
|
<|assistant|>
|
||||||
|
|
||||||
|
"""
|
||||||
|
#topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary
|
||||||
|
#Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary
|
||||||
|
|
||||||
|
#Use only the documentation provided to answer this question: {question}
|
||||||
|
|
||||||
|
|
||||||
|
response_text = chat_with_model(prompt_text, stop_token, model, tokenizer)
|
||||||
|
response_text = response_text.replace('<s>','')
|
||||||
|
#print('Response: '+ context)
|
||||||
|
|
||||||
|
# Extract assistant's response from the response_text
|
||||||
|
response_text = response_text.split('</s>\n', 1)[0] # Extract the first message from the assistant
|
||||||
|
|
||||||
|
print(f"\n______________________________________________\n\nAssistant: {response_text}")
|
||||||
|
|
||||||
|
# Update conversation history
|
||||||
|
conversation_history += f"{prompt_text}{response_text}</s>\n"
|
||||||
|
if len(conversation_history) > 2048:
|
||||||
|
conversation_history = conversation_history[1024:]
|
||||||
|
else:
|
||||||
|
conversation_history = conversation_history
|
||||||
|
|
||||||
|
num_chat += 1
|
||||||
|
|
||||||
|
|
||||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:bf467c9e0f536bda271283c6ef85eb1a943e3196b621c8a912d64953b205df83
|
||||||
|
size 1842795
|
||||||
41
tokenizer_config.json
Normal file
41
tokenizer_config.json
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"model_max_length": 2048,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"padding_side": "right",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false
|
||||||
|
}
|
||||||
9
train_results.json
Normal file
9
train_results.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"total_flos": 9.08850744042455e+17,
|
||||||
|
"train_loss": 0.8321872255313152,
|
||||||
|
"train_runtime": 82908.8976,
|
||||||
|
"train_samples": 71495,
|
||||||
|
"train_samples_per_second": 0.862,
|
||||||
|
"train_steps_per_second": 0.072
|
||||||
|
}
|
||||||
455
trainer_state.json
Normal file
455
trainer_state.json
Normal file
@@ -0,0 +1,455 @@
|
|||||||
|
{
|
||||||
|
"best_metric": null,
|
||||||
|
"best_model_checkpoint": null,
|
||||||
|
"epoch": 1.0,
|
||||||
|
"eval_steps": 500,
|
||||||
|
"global_step": 5958,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"epoch": 0.016784155756965426,
|
||||||
|
"grad_norm": 83914.40625,
|
||||||
|
"learning_rate": 4.916079221215173e-05,
|
||||||
|
"loss": 0.9807,
|
||||||
|
"step": 100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.03356831151393085,
|
||||||
|
"grad_norm": 97513.53125,
|
||||||
|
"learning_rate": 4.832158442430346e-05,
|
||||||
|
"loss": 0.9534,
|
||||||
|
"step": 200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.050352467270896276,
|
||||||
|
"grad_norm": 80779.1171875,
|
||||||
|
"learning_rate": 4.748237663645519e-05,
|
||||||
|
"loss": 0.9194,
|
||||||
|
"step": 300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.0671366230278617,
|
||||||
|
"grad_norm": 71209.2421875,
|
||||||
|
"learning_rate": 4.664316884860692e-05,
|
||||||
|
"loss": 0.9223,
|
||||||
|
"step": 400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.08392077878482712,
|
||||||
|
"grad_norm": 75543.140625,
|
||||||
|
"learning_rate": 4.5803961060758646e-05,
|
||||||
|
"loss": 0.9167,
|
||||||
|
"step": 500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.10070493454179255,
|
||||||
|
"grad_norm": 81713.671875,
|
||||||
|
"learning_rate": 4.4964753272910375e-05,
|
||||||
|
"loss": 0.8878,
|
||||||
|
"step": 600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.11748909029875797,
|
||||||
|
"grad_norm": 79699.15625,
|
||||||
|
"learning_rate": 4.4125545485062104e-05,
|
||||||
|
"loss": 0.8801,
|
||||||
|
"step": 700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.1342732460557234,
|
||||||
|
"grad_norm": 57691.8359375,
|
||||||
|
"learning_rate": 4.328633769721383e-05,
|
||||||
|
"loss": 0.9157,
|
||||||
|
"step": 800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.1510574018126888,
|
||||||
|
"grad_norm": 78234.8359375,
|
||||||
|
"learning_rate": 4.244712990936556e-05,
|
||||||
|
"loss": 0.8952,
|
||||||
|
"step": 900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.16784155756965424,
|
||||||
|
"grad_norm": 64203.4140625,
|
||||||
|
"learning_rate": 4.160792212151729e-05,
|
||||||
|
"loss": 0.9049,
|
||||||
|
"step": 1000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.18462571332661967,
|
||||||
|
"grad_norm": 63400.09375,
|
||||||
|
"learning_rate": 4.076871433366902e-05,
|
||||||
|
"loss": 0.8827,
|
||||||
|
"step": 1100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.2014098690835851,
|
||||||
|
"grad_norm": 71029.0078125,
|
||||||
|
"learning_rate": 3.992950654582075e-05,
|
||||||
|
"loss": 0.9008,
|
||||||
|
"step": 1200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.21819402484055053,
|
||||||
|
"grad_norm": 67397.421875,
|
||||||
|
"learning_rate": 3.9090298757972476e-05,
|
||||||
|
"loss": 0.889,
|
||||||
|
"step": 1300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.23497818059751593,
|
||||||
|
"grad_norm": 63474.31640625,
|
||||||
|
"learning_rate": 3.8251090970124205e-05,
|
||||||
|
"loss": 0.8945,
|
||||||
|
"step": 1400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.25176233635448136,
|
||||||
|
"grad_norm": 61201.125,
|
||||||
|
"learning_rate": 3.7411883182275934e-05,
|
||||||
|
"loss": 0.8427,
|
||||||
|
"step": 1500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.2685464921114468,
|
||||||
|
"grad_norm": 77147.5390625,
|
||||||
|
"learning_rate": 3.657267539442766e-05,
|
||||||
|
"loss": 0.8659,
|
||||||
|
"step": 1600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.2853306478684122,
|
||||||
|
"grad_norm": 135552.828125,
|
||||||
|
"learning_rate": 3.573346760657939e-05,
|
||||||
|
"loss": 0.8756,
|
||||||
|
"step": 1700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.3021148036253776,
|
||||||
|
"grad_norm": 62637.78515625,
|
||||||
|
"learning_rate": 3.489425981873112e-05,
|
||||||
|
"loss": 0.8552,
|
||||||
|
"step": 1800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.3188989593823431,
|
||||||
|
"grad_norm": 86379.7578125,
|
||||||
|
"learning_rate": 3.405505203088285e-05,
|
||||||
|
"loss": 0.8667,
|
||||||
|
"step": 1900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.3356831151393085,
|
||||||
|
"grad_norm": 70539.9921875,
|
||||||
|
"learning_rate": 3.321584424303458e-05,
|
||||||
|
"loss": 0.8466,
|
||||||
|
"step": 2000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.35246727089627394,
|
||||||
|
"grad_norm": 59345.5234375,
|
||||||
|
"learning_rate": 3.2376636455186307e-05,
|
||||||
|
"loss": 0.8694,
|
||||||
|
"step": 2100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.36925142665323935,
|
||||||
|
"grad_norm": 72302.8828125,
|
||||||
|
"learning_rate": 3.1537428667338035e-05,
|
||||||
|
"loss": 0.8417,
|
||||||
|
"step": 2200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.38603558241020475,
|
||||||
|
"grad_norm": 65996.3828125,
|
||||||
|
"learning_rate": 3.0698220879489764e-05,
|
||||||
|
"loss": 0.8473,
|
||||||
|
"step": 2300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.4028197381671702,
|
||||||
|
"grad_norm": 84450.78125,
|
||||||
|
"learning_rate": 2.9859013091641493e-05,
|
||||||
|
"loss": 0.8247,
|
||||||
|
"step": 2400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.4196038939241356,
|
||||||
|
"grad_norm": 69873.5390625,
|
||||||
|
"learning_rate": 2.9019805303793218e-05,
|
||||||
|
"loss": 0.8501,
|
||||||
|
"step": 2500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.43638804968110106,
|
||||||
|
"grad_norm": 69021.078125,
|
||||||
|
"learning_rate": 2.818059751594495e-05,
|
||||||
|
"loss": 0.8284,
|
||||||
|
"step": 2600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.45317220543806647,
|
||||||
|
"grad_norm": 75120.1171875,
|
||||||
|
"learning_rate": 2.734138972809668e-05,
|
||||||
|
"loss": 0.847,
|
||||||
|
"step": 2700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.46995636119503187,
|
||||||
|
"grad_norm": 63536.984375,
|
||||||
|
"learning_rate": 2.6502181940248405e-05,
|
||||||
|
"loss": 0.8222,
|
||||||
|
"step": 2800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.4867405169519973,
|
||||||
|
"grad_norm": 66156.6796875,
|
||||||
|
"learning_rate": 2.5662974152400137e-05,
|
||||||
|
"loss": 0.8227,
|
||||||
|
"step": 2900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.5035246727089627,
|
||||||
|
"grad_norm": 69988.140625,
|
||||||
|
"learning_rate": 2.4823766364551865e-05,
|
||||||
|
"loss": 0.8048,
|
||||||
|
"step": 3000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.5203088284659282,
|
||||||
|
"grad_norm": 71663.9921875,
|
||||||
|
"learning_rate": 2.398455857670359e-05,
|
||||||
|
"loss": 0.8351,
|
||||||
|
"step": 3100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.5370929842228936,
|
||||||
|
"grad_norm": 64774.16796875,
|
||||||
|
"learning_rate": 2.3145350788855323e-05,
|
||||||
|
"loss": 0.8121,
|
||||||
|
"step": 3200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.553877139979859,
|
||||||
|
"grad_norm": 63534.6015625,
|
||||||
|
"learning_rate": 2.2306143001007052e-05,
|
||||||
|
"loss": 0.824,
|
||||||
|
"step": 3300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.5706612957368244,
|
||||||
|
"grad_norm": 73855.3984375,
|
||||||
|
"learning_rate": 2.1466935213158777e-05,
|
||||||
|
"loss": 0.817,
|
||||||
|
"step": 3400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.5874454514937899,
|
||||||
|
"grad_norm": 68886.3984375,
|
||||||
|
"learning_rate": 2.062772742531051e-05,
|
||||||
|
"loss": 0.8078,
|
||||||
|
"step": 3500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.6042296072507553,
|
||||||
|
"grad_norm": 68186.78125,
|
||||||
|
"learning_rate": 1.9788519637462235e-05,
|
||||||
|
"loss": 0.8175,
|
||||||
|
"step": 3600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.6210137630077207,
|
||||||
|
"grad_norm": 74676.046875,
|
||||||
|
"learning_rate": 1.8949311849613967e-05,
|
||||||
|
"loss": 0.7929,
|
||||||
|
"step": 3700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.6377979187646862,
|
||||||
|
"grad_norm": 68078.203125,
|
||||||
|
"learning_rate": 1.8110104061765696e-05,
|
||||||
|
"loss": 0.7941,
|
||||||
|
"step": 3800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.6545820745216515,
|
||||||
|
"grad_norm": 66256.53125,
|
||||||
|
"learning_rate": 1.727089627391742e-05,
|
||||||
|
"loss": 0.8264,
|
||||||
|
"step": 3900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.671366230278617,
|
||||||
|
"grad_norm": 71649.8125,
|
||||||
|
"learning_rate": 1.6431688486069153e-05,
|
||||||
|
"loss": 0.7731,
|
||||||
|
"step": 4000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.6881503860355824,
|
||||||
|
"grad_norm": 70382.28125,
|
||||||
|
"learning_rate": 1.559248069822088e-05,
|
||||||
|
"loss": 0.8076,
|
||||||
|
"step": 4100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.7049345417925479,
|
||||||
|
"grad_norm": 70303.125,
|
||||||
|
"learning_rate": 1.4753272910372609e-05,
|
||||||
|
"loss": 0.7841,
|
||||||
|
"step": 4200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.7217186975495132,
|
||||||
|
"grad_norm": 62633.87109375,
|
||||||
|
"learning_rate": 1.391406512252434e-05,
|
||||||
|
"loss": 0.7781,
|
||||||
|
"step": 4300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.7385028533064787,
|
||||||
|
"grad_norm": 69537.5625,
|
||||||
|
"learning_rate": 1.3074857334676067e-05,
|
||||||
|
"loss": 0.7846,
|
||||||
|
"step": 4400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.7552870090634441,
|
||||||
|
"grad_norm": 65905.9765625,
|
||||||
|
"learning_rate": 1.2235649546827795e-05,
|
||||||
|
"loss": 0.7818,
|
||||||
|
"step": 4500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.7720711648204095,
|
||||||
|
"grad_norm": 66610.0703125,
|
||||||
|
"learning_rate": 1.1396441758979524e-05,
|
||||||
|
"loss": 0.774,
|
||||||
|
"step": 4600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.788855320577375,
|
||||||
|
"grad_norm": 73451.1953125,
|
||||||
|
"learning_rate": 1.0557233971131253e-05,
|
||||||
|
"loss": 0.7768,
|
||||||
|
"step": 4700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8056394763343404,
|
||||||
|
"grad_norm": 85429.0390625,
|
||||||
|
"learning_rate": 9.718026183282982e-06,
|
||||||
|
"loss": 0.7805,
|
||||||
|
"step": 4800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8224236320913058,
|
||||||
|
"grad_norm": 63732.640625,
|
||||||
|
"learning_rate": 8.87881839543471e-06,
|
||||||
|
"loss": 0.7685,
|
||||||
|
"step": 4900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8392077878482712,
|
||||||
|
"grad_norm": 65406.63671875,
|
||||||
|
"learning_rate": 8.039610607586439e-06,
|
||||||
|
"loss": 0.7504,
|
||||||
|
"step": 5000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8559919436052367,
|
||||||
|
"grad_norm": 74580.1484375,
|
||||||
|
"learning_rate": 7.200402819738168e-06,
|
||||||
|
"loss": 0.7814,
|
||||||
|
"step": 5100
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8727760993622021,
|
||||||
|
"grad_norm": 73900.3671875,
|
||||||
|
"learning_rate": 6.361195031889897e-06,
|
||||||
|
"loss": 0.7749,
|
||||||
|
"step": 5200
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.8895602551191675,
|
||||||
|
"grad_norm": 77174.4375,
|
||||||
|
"learning_rate": 5.5219872440416254e-06,
|
||||||
|
"loss": 0.7862,
|
||||||
|
"step": 5300
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.9063444108761329,
|
||||||
|
"grad_norm": 61536.1875,
|
||||||
|
"learning_rate": 4.682779456193353e-06,
|
||||||
|
"loss": 0.7669,
|
||||||
|
"step": 5400
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.9231285666330984,
|
||||||
|
"grad_norm": 60535.73828125,
|
||||||
|
"learning_rate": 3.843571668345083e-06,
|
||||||
|
"loss": 0.7767,
|
||||||
|
"step": 5500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.9399127223900637,
|
||||||
|
"grad_norm": 57083.25,
|
||||||
|
"learning_rate": 3.0043638804968113e-06,
|
||||||
|
"loss": 0.7772,
|
||||||
|
"step": 5600
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.9566968781470292,
|
||||||
|
"grad_norm": 65630.703125,
|
||||||
|
"learning_rate": 2.16515609264854e-06,
|
||||||
|
"loss": 0.7749,
|
||||||
|
"step": 5700
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.9734810339039947,
|
||||||
|
"grad_norm": 66215.3359375,
|
||||||
|
"learning_rate": 1.3259483048002687e-06,
|
||||||
|
"loss": 0.782,
|
||||||
|
"step": 5800
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.99026518966096,
|
||||||
|
"grad_norm": 72915.515625,
|
||||||
|
"learning_rate": 4.867405169519974e-07,
|
||||||
|
"loss": 0.7722,
|
||||||
|
"step": 5900
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"step": 5958,
|
||||||
|
"total_flos": 9.08850744042455e+17,
|
||||||
|
"train_loss": 0.8321872255313152,
|
||||||
|
"train_runtime": 82908.8976,
|
||||||
|
"train_samples_per_second": 0.862,
|
||||||
|
"train_steps_per_second": 0.072
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 100,
|
||||||
|
"max_steps": 5958,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 1,
|
||||||
|
"save_steps": 5958,
|
||||||
|
"stateful_callbacks": {
|
||||||
|
"TrainerControl": {
|
||||||
|
"args": {
|
||||||
|
"should_epoch_stop": false,
|
||||||
|
"should_evaluate": false,
|
||||||
|
"should_log": false,
|
||||||
|
"should_save": true,
|
||||||
|
"should_training_stop": true
|
||||||
|
},
|
||||||
|
"attributes": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total_flos": 9.08850744042455e+17,
|
||||||
|
"train_batch_size": 12,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:dbd6ecac03d543acbcd036abe674eba4f3d9c6b10013486b12a0549c7e08434e
|
||||||
|
size 5112
|
||||||
Reference in New Issue
Block a user