初始化项目,由ModelHub XC社区提供模型

Model: Josephgflowers/TinyLlama-Cinder-Agent-Rag
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-17 12:31:07 +08:00
commit 8a5219cbb6
14 changed files with 858 additions and 0 deletions

49
.gitattributes vendored Normal file
View File

@@ -0,0 +1,49 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bin.* filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*.tfevents* filter=lfs diff=lfs merge=lfs -text
*.db* filter=lfs diff=lfs merge=lfs -text
*.ark* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.gguf* filter=lfs diff=lfs merge=lfs -text
*.ggml filter=lfs diff=lfs merge=lfs -text
*.llamafile* filter=lfs diff=lfs merge=lfs -text
*.pt2 filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

61
README.md Normal file
View File

@@ -0,0 +1,61 @@
---
license: mit
base_model: Josephgflowers/TinyLlama-3T-Cinder-v1.2
tags:
- generated_from_trainer
model-index:
- name: TinyLlama-Cinder-Agent-Rag
results: []
---
This is first pass training. Further training and model update coming.
# TinyLlama-Cinder-Agent-Rag
Special Thanks to https://nationtech.io/ for their generous sponorship in training this model.
![image/png](https://cdn-uploads.huggingface.co/production/uploads/6328952f798f8d122ce62a44/MbN_SXChmMxuHO8GjdUSc.png)
This model is a fine-tuned version of [Josephgflowers/TinyLlama-3T-Cinder-v1.2](https://huggingface.co/Josephgflowers/TinyLlama-3T-Cinder-v1.2) on https://huggingface.co/datasets/Josephgflowers/agent_1.
## Model description
This models is trained for RAG, Summary, Function Calling and Tool usage. Trained off of Cinder. Cinder is a chatbot designed for chat about STEM topics and storytelling. More information coming.
More model versions coming soon.
See https://huggingface.co/Josephgflowers/TinyLlama-Cinder-Agent-Rag/blob/main/tinyllama_agent_cinder_txtai-rag.py
For usage example with wiki rag.
## Intended uses & limitations
RAG, Chat, Summary, and tool usage.
![image/png](https://cdn-uploads.huggingface.co/production/uploads/6328952f798f8d122ce62a44/iKqIkk14iwrd50oPrKOFc.png)
![image/png](https://cdn-uploads.huggingface.co/production/uploads/6328952f798f8d122ce62a44/ijVXD83CGR0JG_sFZZXi6.png)
### Training hyperparameters
The following hyperparameters were used during training:
- learning_rate: 5e-05
- train_batch_size: 12
- eval_batch_size: 32
- seed: 42
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
- lr_scheduler_type: linear
- num_epochs: 1.0
- mixed_precision_training: Native AMP
### Training results
### Framework versions
- Transformers 4.41.0.dev0
- Pytorch 2.2.2+cu121
- Datasets 2.19.1
- Tokenizers 0.19.1

9
all_results.json Normal file
View File

@@ -0,0 +1,9 @@
{
"epoch": 1.0,
"total_flos": 9.08850744042455e+17,
"train_loss": 0.8321872255313152,
"train_runtime": 82908.8976,
"train_samples": 71495,
"train_samples_per_second": 0.862,
"train_steps_per_second": 0.072
}

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "Josephgflowers/TinyLlama-3T-Cinder-v1.2",
"architectures": [
"LlamaForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 5632,
"max_position_embeddings": 2048,
"mlp_bias": false,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 22,
"num_key_value_heads": 4,
"pretraining_tp": 1,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.41.0.dev0",
"use_cache": false,
"vocab_size": 32000
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"bos_token_id": 1,
"eos_token_id": 2,
"max_length": 2048,
"pad_token_id": 0,
"transformers_version": "4.41.0.dev0"
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0543fbabf0edae6ff52542b33203d63e0b477429f6973399eef6468b412371a4
size 4400216536

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

View File

@@ -0,0 +1,158 @@
import requests
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import torch.nn as nn
from torchsummary import summary
from accelerate import dispatch_model, infer_auto_device_map
from txtai import Embeddings
from txtai.pipeline import LLM
#pip3 install git+https://github.com/neuml/txtai#egg=txtai[pipeline-llm]
# Wikipedia Embeddings Database
embeddings = Embeddings()
embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia")
#os.environ['OMP_NUM_THREADS'] = '6'
#
#DuckDuckGo
#
def query_duckduckgo(query):
"""Query DuckDuckGo API for a given search term and return the results."""
url = "https://api.duckduckgo.com/"
params = {
'q': query,
'format': 'json',
'pretty': '1',
'no_html': '1'
}
try:
response = requests.get(url, params=params)
response.raise_for_status() # Raises an HTTPError for bad responses
return response.json()
except requests.RequestException as e:
print(f"An error occurred: {e}")
return None
def handle_query(user_input):
"""Process user input and display the answer from DuckDuckGo."""
result = query_duckduckgo(user_input)
if result and 'AbstractText' in result and result['AbstractText']:
print(result['AbstractText'])
else:
print("DuckDuck Go failed. Going to Wiki.")
result ="\n".join([x["text"] for x in embeddings.search(user_input)])
print("Restults from Wiki: \n",result)
# Load model and tokenizer
model_path = "Josephgflowers/TinyLlama-Cinder-Agent-Rag"#
# Define the device (CPU or GPU)
#device = torch.device("cuda")
device = torch.device("cpu")
model = AutoModelForCausalLM.from_pretrained(model_path,ignore_mismatched_sizes=True).to(device)
print(model)
total_params = sum(p.numel() for p in model.parameters())
print("Total number of parameters: ", total_params)
sequence_length = 2048 # or whatever your specific sequence length is
#embedding_size = 2048 # as per your model's definition
tokenizer = AutoTokenizer.from_pretrained(model_path)
stop_token =2 #3556 </ #2 #128247
#'</s>' 2
def chat_with_model(prompt_text, stop_token, model, tokenizer):
# Encode the prompt text
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt").to(device)
# Generate response
output_sequences = model.generate(
input_ids=encoded_prompt,
#max_length=len(encoded_prompt[0]) + 256,
max_new_tokens=256,
temperature=0.1,
repetition_penalty=1.2,
top_k=20,
top_p=0.9,
do_sample=True,
num_return_sequences=1,
eos_token_id=stop_token
)
# Decode the generated sequence
generated_sequence = output_sequences[0].tolist()
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
response_text = text[len(prompt_text):].strip() # Extract only the response text
#response_text = response_text.replace("<s>","").replace("</s>","")
return response_text
# Initialize conversation history
conversation_history = ''#'<s>\n<|system|>\nYou are a helpful assistant.</s>\n'#'<s>\n<|system|>\nYou are a
# Get user's preference for input mode and character name
input_mode = 'text' ##input("Enter 'text' for text input or 'speech' for speech input: ").lower()
character_name = '<|user|>' # input("Enter your character name (USER, JONAH, JOSEPH, KIMBERLY, etc.): ")
#
#handle_query(user_input)
# Chat loop
num_chat = 1
while num_chat <= 20:
question = input(f"{character_name}: ")
user_input = question # Get text input from user
#context = "\n".join([x["text"] for x in embeddings.search(question)])
context= handle_query(user_input)
#print('History: '+ conversation_history)
prompt_text = f"""
<s>
<|system|>
You will be given documentation as context to answer a users question. You are an expert at summarization. Pay close attention to the key concepts. Use only information from the Context in your answer.
</s>
<|data|>
Context:
{context}
-Use only the above context to answer the question.
</s>
<|user|>
Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary.
</s>
<|assistant|>
"""
#topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary
#Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary
#Use only the documentation provided to answer this question: {question}
response_text = chat_with_model(prompt_text, stop_token, model, tokenizer)
response_text = response_text.replace('<s>','')
#print('Response: '+ context)
# Extract assistant's response from the response_text
response_text = response_text.split('</s>\n', 1)[0] # Extract the first message from the assistant
print(f"\n______________________________________________\n\nAssistant: {response_text}")
# Update conversation history
conversation_history += f"{prompt_text}{response_text}</s>\n"
if len(conversation_history) > 2048:
conversation_history = conversation_history[1024:]
else:
conversation_history = conversation_history
num_chat += 1

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bf467c9e0f536bda271283c6ef85eb1a943e3196b621c8a912d64953b205df83
size 1842795

41
tokenizer_config.json Normal file
View File

@@ -0,0 +1,41 @@
{
"add_bos_token": true,
"add_eos_token": false,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<s>",
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"model_max_length": 2048,
"pad_token": "</s>",
"padding_side": "right",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}

9
train_results.json Normal file
View File

@@ -0,0 +1,9 @@
{
"epoch": 1.0,
"total_flos": 9.08850744042455e+17,
"train_loss": 0.8321872255313152,
"train_runtime": 82908.8976,
"train_samples": 71495,
"train_samples_per_second": 0.862,
"train_steps_per_second": 0.072
}

455
trainer_state.json Normal file
View File

@@ -0,0 +1,455 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 5958,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016784155756965426,
"grad_norm": 83914.40625,
"learning_rate": 4.916079221215173e-05,
"loss": 0.9807,
"step": 100
},
{
"epoch": 0.03356831151393085,
"grad_norm": 97513.53125,
"learning_rate": 4.832158442430346e-05,
"loss": 0.9534,
"step": 200
},
{
"epoch": 0.050352467270896276,
"grad_norm": 80779.1171875,
"learning_rate": 4.748237663645519e-05,
"loss": 0.9194,
"step": 300
},
{
"epoch": 0.0671366230278617,
"grad_norm": 71209.2421875,
"learning_rate": 4.664316884860692e-05,
"loss": 0.9223,
"step": 400
},
{
"epoch": 0.08392077878482712,
"grad_norm": 75543.140625,
"learning_rate": 4.5803961060758646e-05,
"loss": 0.9167,
"step": 500
},
{
"epoch": 0.10070493454179255,
"grad_norm": 81713.671875,
"learning_rate": 4.4964753272910375e-05,
"loss": 0.8878,
"step": 600
},
{
"epoch": 0.11748909029875797,
"grad_norm": 79699.15625,
"learning_rate": 4.4125545485062104e-05,
"loss": 0.8801,
"step": 700
},
{
"epoch": 0.1342732460557234,
"grad_norm": 57691.8359375,
"learning_rate": 4.328633769721383e-05,
"loss": 0.9157,
"step": 800
},
{
"epoch": 0.1510574018126888,
"grad_norm": 78234.8359375,
"learning_rate": 4.244712990936556e-05,
"loss": 0.8952,
"step": 900
},
{
"epoch": 0.16784155756965424,
"grad_norm": 64203.4140625,
"learning_rate": 4.160792212151729e-05,
"loss": 0.9049,
"step": 1000
},
{
"epoch": 0.18462571332661967,
"grad_norm": 63400.09375,
"learning_rate": 4.076871433366902e-05,
"loss": 0.8827,
"step": 1100
},
{
"epoch": 0.2014098690835851,
"grad_norm": 71029.0078125,
"learning_rate": 3.992950654582075e-05,
"loss": 0.9008,
"step": 1200
},
{
"epoch": 0.21819402484055053,
"grad_norm": 67397.421875,
"learning_rate": 3.9090298757972476e-05,
"loss": 0.889,
"step": 1300
},
{
"epoch": 0.23497818059751593,
"grad_norm": 63474.31640625,
"learning_rate": 3.8251090970124205e-05,
"loss": 0.8945,
"step": 1400
},
{
"epoch": 0.25176233635448136,
"grad_norm": 61201.125,
"learning_rate": 3.7411883182275934e-05,
"loss": 0.8427,
"step": 1500
},
{
"epoch": 0.2685464921114468,
"grad_norm": 77147.5390625,
"learning_rate": 3.657267539442766e-05,
"loss": 0.8659,
"step": 1600
},
{
"epoch": 0.2853306478684122,
"grad_norm": 135552.828125,
"learning_rate": 3.573346760657939e-05,
"loss": 0.8756,
"step": 1700
},
{
"epoch": 0.3021148036253776,
"grad_norm": 62637.78515625,
"learning_rate": 3.489425981873112e-05,
"loss": 0.8552,
"step": 1800
},
{
"epoch": 0.3188989593823431,
"grad_norm": 86379.7578125,
"learning_rate": 3.405505203088285e-05,
"loss": 0.8667,
"step": 1900
},
{
"epoch": 0.3356831151393085,
"grad_norm": 70539.9921875,
"learning_rate": 3.321584424303458e-05,
"loss": 0.8466,
"step": 2000
},
{
"epoch": 0.35246727089627394,
"grad_norm": 59345.5234375,
"learning_rate": 3.2376636455186307e-05,
"loss": 0.8694,
"step": 2100
},
{
"epoch": 0.36925142665323935,
"grad_norm": 72302.8828125,
"learning_rate": 3.1537428667338035e-05,
"loss": 0.8417,
"step": 2200
},
{
"epoch": 0.38603558241020475,
"grad_norm": 65996.3828125,
"learning_rate": 3.0698220879489764e-05,
"loss": 0.8473,
"step": 2300
},
{
"epoch": 0.4028197381671702,
"grad_norm": 84450.78125,
"learning_rate": 2.9859013091641493e-05,
"loss": 0.8247,
"step": 2400
},
{
"epoch": 0.4196038939241356,
"grad_norm": 69873.5390625,
"learning_rate": 2.9019805303793218e-05,
"loss": 0.8501,
"step": 2500
},
{
"epoch": 0.43638804968110106,
"grad_norm": 69021.078125,
"learning_rate": 2.818059751594495e-05,
"loss": 0.8284,
"step": 2600
},
{
"epoch": 0.45317220543806647,
"grad_norm": 75120.1171875,
"learning_rate": 2.734138972809668e-05,
"loss": 0.847,
"step": 2700
},
{
"epoch": 0.46995636119503187,
"grad_norm": 63536.984375,
"learning_rate": 2.6502181940248405e-05,
"loss": 0.8222,
"step": 2800
},
{
"epoch": 0.4867405169519973,
"grad_norm": 66156.6796875,
"learning_rate": 2.5662974152400137e-05,
"loss": 0.8227,
"step": 2900
},
{
"epoch": 0.5035246727089627,
"grad_norm": 69988.140625,
"learning_rate": 2.4823766364551865e-05,
"loss": 0.8048,
"step": 3000
},
{
"epoch": 0.5203088284659282,
"grad_norm": 71663.9921875,
"learning_rate": 2.398455857670359e-05,
"loss": 0.8351,
"step": 3100
},
{
"epoch": 0.5370929842228936,
"grad_norm": 64774.16796875,
"learning_rate": 2.3145350788855323e-05,
"loss": 0.8121,
"step": 3200
},
{
"epoch": 0.553877139979859,
"grad_norm": 63534.6015625,
"learning_rate": 2.2306143001007052e-05,
"loss": 0.824,
"step": 3300
},
{
"epoch": 0.5706612957368244,
"grad_norm": 73855.3984375,
"learning_rate": 2.1466935213158777e-05,
"loss": 0.817,
"step": 3400
},
{
"epoch": 0.5874454514937899,
"grad_norm": 68886.3984375,
"learning_rate": 2.062772742531051e-05,
"loss": 0.8078,
"step": 3500
},
{
"epoch": 0.6042296072507553,
"grad_norm": 68186.78125,
"learning_rate": 1.9788519637462235e-05,
"loss": 0.8175,
"step": 3600
},
{
"epoch": 0.6210137630077207,
"grad_norm": 74676.046875,
"learning_rate": 1.8949311849613967e-05,
"loss": 0.7929,
"step": 3700
},
{
"epoch": 0.6377979187646862,
"grad_norm": 68078.203125,
"learning_rate": 1.8110104061765696e-05,
"loss": 0.7941,
"step": 3800
},
{
"epoch": 0.6545820745216515,
"grad_norm": 66256.53125,
"learning_rate": 1.727089627391742e-05,
"loss": 0.8264,
"step": 3900
},
{
"epoch": 0.671366230278617,
"grad_norm": 71649.8125,
"learning_rate": 1.6431688486069153e-05,
"loss": 0.7731,
"step": 4000
},
{
"epoch": 0.6881503860355824,
"grad_norm": 70382.28125,
"learning_rate": 1.559248069822088e-05,
"loss": 0.8076,
"step": 4100
},
{
"epoch": 0.7049345417925479,
"grad_norm": 70303.125,
"learning_rate": 1.4753272910372609e-05,
"loss": 0.7841,
"step": 4200
},
{
"epoch": 0.7217186975495132,
"grad_norm": 62633.87109375,
"learning_rate": 1.391406512252434e-05,
"loss": 0.7781,
"step": 4300
},
{
"epoch": 0.7385028533064787,
"grad_norm": 69537.5625,
"learning_rate": 1.3074857334676067e-05,
"loss": 0.7846,
"step": 4400
},
{
"epoch": 0.7552870090634441,
"grad_norm": 65905.9765625,
"learning_rate": 1.2235649546827795e-05,
"loss": 0.7818,
"step": 4500
},
{
"epoch": 0.7720711648204095,
"grad_norm": 66610.0703125,
"learning_rate": 1.1396441758979524e-05,
"loss": 0.774,
"step": 4600
},
{
"epoch": 0.788855320577375,
"grad_norm": 73451.1953125,
"learning_rate": 1.0557233971131253e-05,
"loss": 0.7768,
"step": 4700
},
{
"epoch": 0.8056394763343404,
"grad_norm": 85429.0390625,
"learning_rate": 9.718026183282982e-06,
"loss": 0.7805,
"step": 4800
},
{
"epoch": 0.8224236320913058,
"grad_norm": 63732.640625,
"learning_rate": 8.87881839543471e-06,
"loss": 0.7685,
"step": 4900
},
{
"epoch": 0.8392077878482712,
"grad_norm": 65406.63671875,
"learning_rate": 8.039610607586439e-06,
"loss": 0.7504,
"step": 5000
},
{
"epoch": 0.8559919436052367,
"grad_norm": 74580.1484375,
"learning_rate": 7.200402819738168e-06,
"loss": 0.7814,
"step": 5100
},
{
"epoch": 0.8727760993622021,
"grad_norm": 73900.3671875,
"learning_rate": 6.361195031889897e-06,
"loss": 0.7749,
"step": 5200
},
{
"epoch": 0.8895602551191675,
"grad_norm": 77174.4375,
"learning_rate": 5.5219872440416254e-06,
"loss": 0.7862,
"step": 5300
},
{
"epoch": 0.9063444108761329,
"grad_norm": 61536.1875,
"learning_rate": 4.682779456193353e-06,
"loss": 0.7669,
"step": 5400
},
{
"epoch": 0.9231285666330984,
"grad_norm": 60535.73828125,
"learning_rate": 3.843571668345083e-06,
"loss": 0.7767,
"step": 5500
},
{
"epoch": 0.9399127223900637,
"grad_norm": 57083.25,
"learning_rate": 3.0043638804968113e-06,
"loss": 0.7772,
"step": 5600
},
{
"epoch": 0.9566968781470292,
"grad_norm": 65630.703125,
"learning_rate": 2.16515609264854e-06,
"loss": 0.7749,
"step": 5700
},
{
"epoch": 0.9734810339039947,
"grad_norm": 66215.3359375,
"learning_rate": 1.3259483048002687e-06,
"loss": 0.782,
"step": 5800
},
{
"epoch": 0.99026518966096,
"grad_norm": 72915.515625,
"learning_rate": 4.867405169519974e-07,
"loss": 0.7722,
"step": 5900
},
{
"epoch": 1.0,
"step": 5958,
"total_flos": 9.08850744042455e+17,
"train_loss": 0.8321872255313152,
"train_runtime": 82908.8976,
"train_samples_per_second": 0.862,
"train_steps_per_second": 0.072
}
],
"logging_steps": 100,
"max_steps": 5958,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5958,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.08850744042455e+17,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:dbd6ecac03d543acbcd036abe674eba4f3d9c6b10013486b12a0549c7e08434e
size 5112