初始化项目,由ModelHub XC社区提供模型
Model: ssz1111/CANOE-LLaMA3-8B Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
65
README.md
Normal file
65
README.md
Normal file
@@ -0,0 +1,65 @@
|
||||
---
|
||||
library_name: transformers
|
||||
tags:
|
||||
- generated_from_trainer
|
||||
- open-r1
|
||||
licence: license
|
||||
---
|
||||
|
||||
# Model Card for None
|
||||
|
||||
This model is a fine-tuned version of [None](https://huggingface.co/None).
|
||||
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||
|
||||
## Quick start
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
|
||||
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||
generator = pipeline("text-generation", model="None", device="cuda")
|
||||
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||
print(output["generated_text"])
|
||||
```
|
||||
|
||||
## Training procedure
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/sishuzheng/huggingface/runs/339l70ik)
|
||||
|
||||
|
||||
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
||||
|
||||
### Framework versions
|
||||
|
||||
- TRL: 0.14.0
|
||||
- Transformers: 4.49.0
|
||||
- Pytorch: 2.5.1
|
||||
- Datasets: 3.3.2
|
||||
- Tokenizers: 0.21.0
|
||||
|
||||
## Citations
|
||||
|
||||
Cite GRPO as:
|
||||
|
||||
```bibtex
|
||||
@article{zhihong2024deepseekmath,
|
||||
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
|
||||
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
|
||||
year = 2024,
|
||||
eprint = {arXiv:2402.03300},
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Cite TRL as:
|
||||
|
||||
```bibtex
|
||||
@misc{vonwerra2022trl,
|
||||
title = {{TRL: Transformer Reinforcement Learning}},
|
||||
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
|
||||
year = 2020,
|
||||
journal = {GitHub repository},
|
||||
publisher = {GitHub},
|
||||
howpublished = {\url{https://github.com/huggingface/trl}}
|
||||
}
|
||||
```
|
||||
8
all_results.json
Normal file
8
all_results.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.005316848002538006,
|
||||
"train_runtime": 54287.5063,
|
||||
"train_samples": 10000,
|
||||
"train_samples_per_second": 0.368,
|
||||
"train_steps_per_second": 0.003
|
||||
}
|
||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"_name_or_path": "/mnt/public/share/users/sishuzheng/models/llama-3-8b-instruct",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128009,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.49.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
128001,
|
||||
128009
|
||||
],
|
||||
"max_length": 4096,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.49.0"
|
||||
}
|
||||
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d56300f97ae57f01c0027e7970245331e9918412e61f7ed645fe061704b7aa8f
|
||||
size 4976698672
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:95f68a39cc523a78b1e5bc4657736eafa420a350e9a144b6a7333da6f7da1360
|
||||
size 4999802720
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7d6c960a54cdd67a59a16cd0dfa9dbbae0ba821370f7f660c360d95e20720d77
|
||||
size 4915916176
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aa0eea76ad4235c894f2208188c181c651b8b8e01a0f3a7b5c621f4597896d3d
|
||||
size 1168138808
|
||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 16060522496
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
17
special_tokens_map.json
Normal file
17
special_tokens_map.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|eot_id|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|eot_id|>"
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fc5ed64d17c57f61c0ef996ac8b3a8918e7d406866cc4a0292d362a31a217e4
|
||||
size 17210125
|
||||
2064
tokenizer_config.json
Normal file
2064
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
8
train_results.json
Normal file
8
train_results.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.005316848002538006,
|
||||
"train_runtime": 54287.5063,
|
||||
"train_samples": 10000,
|
||||
"train_samples_per_second": 0.368,
|
||||
"train_steps_per_second": 0.003
|
||||
}
|
||||
590
trainer_state.json
Normal file
590
trainer_state.json
Normal file
@@ -0,0 +1,590 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 1.9846153846153847,
|
||||
"eval_steps": 500,
|
||||
"global_step": 178,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"completion_length": 239.35076141357422,
|
||||
"epoch": 0.011188811188811189,
|
||||
"grad_norm": 2.018390655517578,
|
||||
"kl": 0.0,
|
||||
"learning_rate": 5.555555555555555e-08,
|
||||
"loss": 0.0,
|
||||
"reward": 1.020408146083355,
|
||||
"reward_std": 0.588430143892765,
|
||||
"rewards/accuracy_reward": 0.07015305897220969,
|
||||
"rewards/format_reward": 0.3928571380674839,
|
||||
"rewards/influence_reward": 0.05229591624811292,
|
||||
"rewards/len_reward": 0.5051020309329033,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"completion_length": 240.83513402938843,
|
||||
"epoch": 0.055944055944055944,
|
||||
"grad_norm": 1.5813062191009521,
|
||||
"kl": 0.00015559792518615723,
|
||||
"learning_rate": 2.7777777777777776e-07,
|
||||
"loss": 0.0,
|
||||
"reward": 0.9544004816561937,
|
||||
"reward_std": 0.6496573686599731,
|
||||
"rewards/accuracy_reward": 0.06377550942124799,
|
||||
"rewards/format_reward": 0.39317601080983877,
|
||||
"rewards/influence_reward": 0.0484693865000736,
|
||||
"rewards/len_reward": 0.44897958217188716,
|
||||
"step": 5
|
||||
},
|
||||
{
|
||||
"completion_length": 239.87346343994142,
|
||||
"epoch": 0.11188811188811189,
|
||||
"grad_norm": 1.199715256690979,
|
||||
"kl": 0.0015879154205322265,
|
||||
"learning_rate": 5.555555555555555e-07,
|
||||
"loss": 0.0001,
|
||||
"reward": 1.2579081252217292,
|
||||
"reward_std": 0.6406066298484803,
|
||||
"rewards/accuracy_reward": 0.09897958971560002,
|
||||
"rewards/format_reward": 0.6145408108830452,
|
||||
"rewards/influence_reward": 0.06607142747379839,
|
||||
"rewards/len_reward": 0.47831631228327753,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"completion_length": 235.00101585388182,
|
||||
"epoch": 0.16783216783216784,
|
||||
"grad_norm": 0.9491918683052063,
|
||||
"kl": 0.012871551513671874,
|
||||
"learning_rate": 8.333333333333333e-07,
|
||||
"loss": 0.0005,
|
||||
"reward": 1.9201530247926712,
|
||||
"reward_std": 0.5499199964106083,
|
||||
"rewards/accuracy_reward": 0.2658163230866194,
|
||||
"rewards/format_reward": 0.9533163100481034,
|
||||
"rewards/influence_reward": 0.19158162884414195,
|
||||
"rewards/len_reward": 0.5094387628138065,
|
||||
"step": 15
|
||||
},
|
||||
{
|
||||
"completion_length": 230.20764770507813,
|
||||
"epoch": 0.22377622377622378,
|
||||
"grad_norm": 1.0375124216079712,
|
||||
"kl": 0.034588623046875,
|
||||
"learning_rate": 9.996145181203615e-07,
|
||||
"loss": 0.0015,
|
||||
"reward": 2.358928510546684,
|
||||
"reward_std": 0.4863013714551926,
|
||||
"rewards/accuracy_reward": 0.46224488839507105,
|
||||
"rewards/format_reward": 0.9905612260103226,
|
||||
"rewards/influence_reward": 0.3974489726126194,
|
||||
"rewards/len_reward": 0.5086734544485807,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"completion_length": 221.39234199523926,
|
||||
"epoch": 0.27972027972027974,
|
||||
"grad_norm": 1.0581080913543701,
|
||||
"kl": 0.064349365234375,
|
||||
"learning_rate": 9.952846702217885e-07,
|
||||
"loss": 0.0027,
|
||||
"reward": 2.6903060495853426,
|
||||
"reward_std": 0.4831090085208416,
|
||||
"rewards/accuracy_reward": 0.5727040730416775,
|
||||
"rewards/format_reward": 0.9933673486113548,
|
||||
"rewards/influence_reward": 0.516071417927742,
|
||||
"rewards/len_reward": 0.6081632524728775,
|
||||
"step": 25
|
||||
},
|
||||
{
|
||||
"completion_length": 223.85866737365723,
|
||||
"epoch": 0.3356643356643357,
|
||||
"grad_norm": 1.002379298210144,
|
||||
"kl": 0.0775390625,
|
||||
"learning_rate": 9.861849601988383e-07,
|
||||
"loss": 0.0032,
|
||||
"reward": 2.7147958517074584,
|
||||
"reward_std": 0.4902082525193691,
|
||||
"rewards/accuracy_reward": 0.552295907586813,
|
||||
"rewards/format_reward": 0.9910714283585549,
|
||||
"rewards/influence_reward": 0.5109693787992,
|
||||
"rewards/len_reward": 0.660459166765213,
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"completion_length": 242.63366928100587,
|
||||
"epoch": 0.3916083916083916,
|
||||
"grad_norm": 0.8198888301849365,
|
||||
"kl": 0.07734375,
|
||||
"learning_rate": 9.72403023233439e-07,
|
||||
"loss": 0.0032,
|
||||
"reward": 2.704081577062607,
|
||||
"reward_std": 0.4683332860469818,
|
||||
"rewards/accuracy_reward": 0.5767857022583485,
|
||||
"rewards/format_reward": 0.9892857119441032,
|
||||
"rewards/influence_reward": 0.5352040722966194,
|
||||
"rewards/len_reward": 0.6028061106801033,
|
||||
"step": 35
|
||||
},
|
||||
{
|
||||
"completion_length": 219.7584140777588,
|
||||
"epoch": 0.44755244755244755,
|
||||
"grad_norm": 0.8813366889953613,
|
||||
"kl": 0.081610107421875,
|
||||
"learning_rate": 9.540715869125407e-07,
|
||||
"loss": 0.0033,
|
||||
"reward": 2.984948921203613,
|
||||
"reward_std": 0.45619520246982576,
|
||||
"rewards/accuracy_reward": 0.620918358117342,
|
||||
"rewards/format_reward": 0.9966836735606194,
|
||||
"rewards/influence_reward": 0.5757652945816517,
|
||||
"rewards/len_reward": 0.7915816128253936,
|
||||
"step": 40
|
||||
},
|
||||
{
|
||||
"completion_length": 213.20280342102052,
|
||||
"epoch": 0.5034965034965035,
|
||||
"grad_norm": 0.8298941850662231,
|
||||
"kl": 0.09119873046875,
|
||||
"learning_rate": 9.313671929888959e-07,
|
||||
"loss": 0.0037,
|
||||
"reward": 2.999489736557007,
|
||||
"reward_std": 0.4441244326531887,
|
||||
"rewards/accuracy_reward": 0.6117346778512001,
|
||||
"rewards/format_reward": 0.9974489793181419,
|
||||
"rewards/influence_reward": 0.5594387598335743,
|
||||
"rewards/len_reward": 0.8308673277497292,
|
||||
"step": 45
|
||||
},
|
||||
{
|
||||
"completion_length": 239.8193817138672,
|
||||
"epoch": 0.5594405594405595,
|
||||
"grad_norm": 0.8936271667480469,
|
||||
"kl": 0.10096435546875,
|
||||
"learning_rate": 9.045084971874737e-07,
|
||||
"loss": 0.0041,
|
||||
"reward": 2.906377512216568,
|
||||
"reward_std": 0.45472528263926504,
|
||||
"rewards/accuracy_reward": 0.5836734607815742,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.5321428462862968,
|
||||
"rewards/len_reward": 0.7948979437351227,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"completion_length": 230.65917854309083,
|
||||
"epoch": 0.6153846153846154,
|
||||
"grad_norm": 0.7805132865905762,
|
||||
"kl": 0.1081298828125,
|
||||
"learning_rate": 8.737541634312983e-07,
|
||||
"loss": 0.0043,
|
||||
"reward": 3.0579080879688263,
|
||||
"reward_std": 0.4420431960374117,
|
||||
"rewards/accuracy_reward": 0.607397947460413,
|
||||
"rewards/format_reward": 0.9959183692932129,
|
||||
"rewards/influence_reward": 0.5612244814634323,
|
||||
"rewards/len_reward": 0.8933673277497292,
|
||||
"step": 55
|
||||
},
|
||||
{
|
||||
"completion_length": 220.4063720703125,
|
||||
"epoch": 0.6713286713286714,
|
||||
"grad_norm": 1.4386658668518066,
|
||||
"kl": 0.1181640625,
|
||||
"learning_rate": 8.394003727664709e-07,
|
||||
"loss": 0.0047,
|
||||
"reward": 3.0977040231227875,
|
||||
"reward_std": 0.4241327825933695,
|
||||
"rewards/accuracy_reward": 0.6344387613236904,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.5821428462862969,
|
||||
"rewards/len_reward": 0.8854591652750969,
|
||||
"step": 60
|
||||
},
|
||||
{
|
||||
"completion_length": 228.13443336486816,
|
||||
"epoch": 0.7272727272727273,
|
||||
"grad_norm": 0.6940521597862244,
|
||||
"kl": 0.11019287109375,
|
||||
"learning_rate": 8.017779709767857e-07,
|
||||
"loss": 0.0044,
|
||||
"reward": 2.9755101561546327,
|
||||
"reward_std": 0.4135630540549755,
|
||||
"rewards/accuracy_reward": 0.5908163137733936,
|
||||
"rewards/format_reward": 0.9948979616165161,
|
||||
"rewards/influence_reward": 0.5410714164376259,
|
||||
"rewards/len_reward": 0.8487244695425034,
|
||||
"step": 65
|
||||
},
|
||||
{
|
||||
"completion_length": 217.46045417785643,
|
||||
"epoch": 0.7832167832167832,
|
||||
"grad_norm": 0.6972203254699707,
|
||||
"kl": 0.1288818359375,
|
||||
"learning_rate": 7.612492823579744e-07,
|
||||
"loss": 0.0052,
|
||||
"reward": 3.136479526758194,
|
||||
"reward_std": 0.42438863664865495,
|
||||
"rewards/accuracy_reward": 0.6517856940627098,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.6112244755029679,
|
||||
"rewards/len_reward": 0.8778061032295227,
|
||||
"step": 70
|
||||
},
|
||||
{
|
||||
"completion_length": 217.32397499084473,
|
||||
"epoch": 0.8391608391608392,
|
||||
"grad_norm": 0.995740532875061,
|
||||
"kl": 0.13599853515625,
|
||||
"learning_rate": 7.182046203366709e-07,
|
||||
"loss": 0.0055,
|
||||
"reward": 3.1479591190814973,
|
||||
"reward_std": 0.4072150893509388,
|
||||
"rewards/accuracy_reward": 0.6653061121702194,
|
||||
"rewards/format_reward": 0.9959183692932129,
|
||||
"rewards/influence_reward": 0.6081632517278195,
|
||||
"rewards/len_reward": 0.8785714104771614,
|
||||
"step": 75
|
||||
},
|
||||
{
|
||||
"completion_length": 204.29948654174805,
|
||||
"epoch": 0.8951048951048951,
|
||||
"grad_norm": 0.8313683867454529,
|
||||
"kl": 0.1267333984375,
|
||||
"learning_rate": 6.730585285387465e-07,
|
||||
"loss": 0.0051,
|
||||
"reward": 3.1954080879688265,
|
||||
"reward_std": 0.39486792534589765,
|
||||
"rewards/accuracy_reward": 0.6806122347712517,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.6438775353133679,
|
||||
"rewards/len_reward": 0.8752550885081292,
|
||||
"step": 80
|
||||
},
|
||||
{
|
||||
"completion_length": 203.56530113220214,
|
||||
"epoch": 0.951048951048951,
|
||||
"grad_norm": 0.7973130345344543,
|
||||
"kl": 0.126806640625,
|
||||
"learning_rate": 6.262457885075789e-07,
|
||||
"loss": 0.0051,
|
||||
"reward": 3.2344387233257295,
|
||||
"reward_std": 0.3648144776001573,
|
||||
"rewards/accuracy_reward": 0.6961734496057034,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.6497448846697808,
|
||||
"rewards/len_reward": 0.8928571224212647,
|
||||
"step": 85
|
||||
},
|
||||
{
|
||||
"completion_length": 221.41661420549664,
|
||||
"epoch": 1.0,
|
||||
"grad_norm": 1.3692800998687744,
|
||||
"kl": 0.13193359375,
|
||||
"learning_rate": 5.782172325201155e-07,
|
||||
"loss": 0.0046,
|
||||
"reward": 3.196792949948992,
|
||||
"reward_std": 0.3790252791983741,
|
||||
"rewards/accuracy_reward": 0.676093282018389,
|
||||
"rewards/format_reward": 0.9962099126407078,
|
||||
"rewards/influence_reward": 0.6265305961881366,
|
||||
"rewards/len_reward": 0.897959165913718,
|
||||
"step": 90
|
||||
},
|
||||
{
|
||||
"completion_length": 228.48239212036134,
|
||||
"epoch": 1.055944055944056,
|
||||
"grad_norm": 0.9810852408409119,
|
||||
"kl": 0.14306640625,
|
||||
"learning_rate": 5.294354018255944e-07,
|
||||
"loss": 0.0057,
|
||||
"reward": 3.2369897425174714,
|
||||
"reward_std": 0.3675705246627331,
|
||||
"rewards/accuracy_reward": 0.6959183529019356,
|
||||
"rewards/format_reward": 0.9979591846466065,
|
||||
"rewards/influence_reward": 0.6489795804023742,
|
||||
"rewards/len_reward": 0.8941326335072517,
|
||||
"step": 95
|
||||
},
|
||||
{
|
||||
"completion_length": 228.94004592895507,
|
||||
"epoch": 1.1118881118881119,
|
||||
"grad_norm": 0.814385712146759,
|
||||
"kl": 0.14249267578125,
|
||||
"learning_rate": 4.803700921204658e-07,
|
||||
"loss": 0.0057,
|
||||
"reward": 3.299489712715149,
|
||||
"reward_std": 0.36674671024084093,
|
||||
"rewards/accuracy_reward": 0.7234693765640259,
|
||||
"rewards/format_reward": 0.9969387769699096,
|
||||
"rewards/influence_reward": 0.6719387613236905,
|
||||
"rewards/len_reward": 0.9071428373456001,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"completion_length": 227.59565849304198,
|
||||
"epoch": 1.167832167832168,
|
||||
"grad_norm": 1.3440909385681152,
|
||||
"kl": 0.15057373046875,
|
||||
"learning_rate": 4.3149382915901606e-07,
|
||||
"loss": 0.006,
|
||||
"reward": 3.2344387233257295,
|
||||
"reward_std": 0.3843592546880245,
|
||||
"rewards/accuracy_reward": 0.6931122295558453,
|
||||
"rewards/format_reward": 0.9966836735606194,
|
||||
"rewards/influence_reward": 0.6403061114251614,
|
||||
"rewards/len_reward": 0.9043367177248001,
|
||||
"step": 105
|
||||
},
|
||||
{
|
||||
"completion_length": 220.42397651672363,
|
||||
"epoch": 1.2237762237762237,
|
||||
"grad_norm": 1.2474172115325928,
|
||||
"kl": 0.1584716796875,
|
||||
"learning_rate": 3.8327731807204744e-07,
|
||||
"loss": 0.0063,
|
||||
"reward": 3.296428495645523,
|
||||
"reward_std": 0.38065838664770124,
|
||||
"rewards/accuracy_reward": 0.7209183543920517,
|
||||
"rewards/format_reward": 0.9974489808082581,
|
||||
"rewards/influence_reward": 0.6640305913984775,
|
||||
"rewards/len_reward": 0.9140305906534195,
|
||||
"step": 110
|
||||
},
|
||||
{
|
||||
"completion_length": 228.10331192016602,
|
||||
"epoch": 1.2797202797202798,
|
||||
"grad_norm": 1.0222375392913818,
|
||||
"kl": 0.17257080078125,
|
||||
"learning_rate": 3.361849102191533e-07,
|
||||
"loss": 0.0069,
|
||||
"reward": 3.247704017162323,
|
||||
"reward_std": 0.37765960246324537,
|
||||
"rewards/accuracy_reward": 0.698979577422142,
|
||||
"rewards/format_reward": 0.9948979601264,
|
||||
"rewards/influence_reward": 0.6392856992781162,
|
||||
"rewards/len_reward": 0.9145407944917678,
|
||||
"step": 115
|
||||
},
|
||||
{
|
||||
"completion_length": 212.8178524017334,
|
||||
"epoch": 1.3356643356643356,
|
||||
"grad_norm": 0.9680814146995544,
|
||||
"kl": 0.14952392578125,
|
||||
"learning_rate": 2.906701312312861e-07,
|
||||
"loss": 0.006,
|
||||
"reward": 3.277550941705704,
|
||||
"reward_std": 0.36851916685700414,
|
||||
"rewards/accuracy_reward": 0.7150510035455226,
|
||||
"rewards/format_reward": 0.9966836750507355,
|
||||
"rewards/influence_reward": 0.659948968142271,
|
||||
"rewards/len_reward": 0.9058673277497291,
|
||||
"step": 120
|
||||
},
|
||||
{
|
||||
"completion_length": 218.52907676696776,
|
||||
"epoch": 1.3916083916083917,
|
||||
"grad_norm": 0.8980757594108582,
|
||||
"kl": 0.144287109375,
|
||||
"learning_rate": 2.4717131331100774e-07,
|
||||
"loss": 0.0058,
|
||||
"reward": 3.2676019430160523,
|
||||
"reward_std": 0.34619110673666,
|
||||
"rewards/accuracy_reward": 0.7015306010842324,
|
||||
"rewards/format_reward": 0.9969387769699096,
|
||||
"rewards/influence_reward": 0.6543367207050323,
|
||||
"rewards/len_reward": 0.9147959008812905,
|
||||
"step": 125
|
||||
},
|
||||
{
|
||||
"completion_length": 220.4456588745117,
|
||||
"epoch": 1.4475524475524475,
|
||||
"grad_norm": 1.6906476020812988,
|
||||
"kl": 0.1458251953125,
|
||||
"learning_rate": 2.0610737385376348e-07,
|
||||
"loss": 0.0058,
|
||||
"reward": 3.244897884130478,
|
||||
"reward_std": 0.34559424556791785,
|
||||
"rewards/accuracy_reward": 0.7017857000231743,
|
||||
"rewards/format_reward": 0.9964285731315613,
|
||||
"rewards/influence_reward": 0.648724476993084,
|
||||
"rewards/len_reward": 0.8979591608047486,
|
||||
"step": 130
|
||||
},
|
||||
{
|
||||
"completion_length": 222.28366889953614,
|
||||
"epoch": 1.5034965034965035,
|
||||
"grad_norm": 2.0042786598205566,
|
||||
"kl": 0.15557861328125,
|
||||
"learning_rate": 1.6787378104435929e-07,
|
||||
"loss": 0.0062,
|
||||
"reward": 3.2635203421115877,
|
||||
"reward_std": 0.3759432673454285,
|
||||
"rewards/accuracy_reward": 0.7086734533309936,
|
||||
"rewards/format_reward": 0.9946428582072258,
|
||||
"rewards/influence_reward": 0.6635203965008258,
|
||||
"rewards/len_reward": 0.8966836482286453,
|
||||
"step": 135
|
||||
},
|
||||
{
|
||||
"completion_length": 226.27831115722657,
|
||||
"epoch": 1.5594405594405596,
|
||||
"grad_norm": 1.0714577436447144,
|
||||
"kl": 0.14898681640625,
|
||||
"learning_rate": 1.3283874528215733e-07,
|
||||
"loss": 0.006,
|
||||
"reward": 3.270918291807175,
|
||||
"reward_std": 0.3628778774291277,
|
||||
"rewards/accuracy_reward": 0.713775496929884,
|
||||
"rewards/format_reward": 0.9964285716414452,
|
||||
"rewards/influence_reward": 0.6655612140893936,
|
||||
"rewards/len_reward": 0.8951530426740646,
|
||||
"step": 140
|
||||
},
|
||||
{
|
||||
"completion_length": 216.52473983764648,
|
||||
"epoch": 1.6153846153846154,
|
||||
"grad_norm": 1.0027109384536743,
|
||||
"kl": 0.1531005859375,
|
||||
"learning_rate": 1.013396731136465e-07,
|
||||
"loss": 0.0061,
|
||||
"reward": 3.3109693050384523,
|
||||
"reward_std": 0.3781158674508333,
|
||||
"rewards/accuracy_reward": 0.7298469215631485,
|
||||
"rewards/format_reward": 0.9979591846466065,
|
||||
"rewards/influence_reward": 0.6729591734707355,
|
||||
"rewards/len_reward": 0.9102040633559227,
|
||||
"step": 145
|
||||
},
|
||||
{
|
||||
"completion_length": 214.229333114624,
|
||||
"epoch": 1.6713286713286712,
|
||||
"grad_norm": 1.2131189107894897,
|
||||
"kl": 0.16943359375,
|
||||
"learning_rate": 7.36799178229539e-08,
|
||||
"loss": 0.0068,
|
||||
"reward": 3.2859693229198457,
|
||||
"reward_std": 0.35086961574852465,
|
||||
"rewards/accuracy_reward": 0.7020408011972904,
|
||||
"rewards/format_reward": 0.9961734697222709,
|
||||
"rewards/influence_reward": 0.6668367192149163,
|
||||
"rewards/len_reward": 0.920918345451355,
|
||||
"step": 150
|
||||
},
|
||||
{
|
||||
"completion_length": 218.42678260803223,
|
||||
"epoch": 1.7272727272727273,
|
||||
"grad_norm": 28.282617568969727,
|
||||
"kl": 0.187109375,
|
||||
"learning_rate": 5.012585797388935e-08,
|
||||
"loss": 0.0075,
|
||||
"reward": 3.2318876922130584,
|
||||
"reward_std": 0.35199854988604784,
|
||||
"rewards/accuracy_reward": 0.6829081475734711,
|
||||
"rewards/format_reward": 0.9956632673740387,
|
||||
"rewards/influence_reward": 0.6392856985330582,
|
||||
"rewards/len_reward": 0.9140305936336517,
|
||||
"step": 155
|
||||
},
|
||||
{
|
||||
"completion_length": 215.96785354614258,
|
||||
"epoch": 1.7832167832167833,
|
||||
"grad_norm": 1.0345922708511353,
|
||||
"kl": 0.17052001953125,
|
||||
"learning_rate": 3.0904332038757974e-08,
|
||||
"loss": 0.0068,
|
||||
"reward": 3.295152986049652,
|
||||
"reward_std": 0.3517200522124767,
|
||||
"rewards/accuracy_reward": 0.7298469223082066,
|
||||
"rewards/format_reward": 0.9943877562880516,
|
||||
"rewards/influence_reward": 0.6688775345683098,
|
||||
"rewards/len_reward": 0.9020407944917679,
|
||||
"step": 160
|
||||
},
|
||||
{
|
||||
"completion_length": 220.99591369628905,
|
||||
"epoch": 1.8391608391608392,
|
||||
"grad_norm": 0.9470728039741516,
|
||||
"kl": 0.16759033203125,
|
||||
"learning_rate": 1.6200453819870118e-08,
|
||||
"loss": 0.0067,
|
||||
"reward": 3.3061223804950712,
|
||||
"reward_std": 0.3646129764616489,
|
||||
"rewards/accuracy_reward": 0.7349489614367485,
|
||||
"rewards/format_reward": 0.9943877577781677,
|
||||
"rewards/influence_reward": 0.6849489629268646,
|
||||
"rewards/len_reward": 0.8918367147445678,
|
||||
"step": 165
|
||||
},
|
||||
{
|
||||
"completion_length": 212.3209140777588,
|
||||
"epoch": 1.895104895104895,
|
||||
"grad_norm": 1.04275381565094,
|
||||
"kl": 0.53802490234375,
|
||||
"learning_rate": 6.15582970243117e-09,
|
||||
"loss": 0.0216,
|
||||
"reward": 3.295663195848465,
|
||||
"reward_std": 0.35336949974298476,
|
||||
"rewards/accuracy_reward": 0.7135203927755356,
|
||||
"rewards/format_reward": 0.9966836735606194,
|
||||
"rewards/influence_reward": 0.6688775405287742,
|
||||
"rewards/len_reward": 0.9165816113352776,
|
||||
"step": 170
|
||||
},
|
||||
{
|
||||
"completion_length": 214.2193832397461,
|
||||
"epoch": 1.951048951048951,
|
||||
"grad_norm": 1.4784653186798096,
|
||||
"kl": 0.1680908203125,
|
||||
"learning_rate": 8.671949076420881e-10,
|
||||
"loss": 0.0067,
|
||||
"reward": 3.292857068777084,
|
||||
"reward_std": 0.3482844814658165,
|
||||
"rewards/accuracy_reward": 0.7214285537600518,
|
||||
"rewards/format_reward": 0.9946428582072258,
|
||||
"rewards/influence_reward": 0.6732142709195614,
|
||||
"rewards/len_reward": 0.9035714089870452,
|
||||
"step": 175
|
||||
},
|
||||
{
|
||||
"completion_length": 220.2002493540446,
|
||||
"epoch": 1.9846153846153847,
|
||||
"kl": 0.19038899739583334,
|
||||
"reward": 3.2593536575635276,
|
||||
"reward_std": 0.3590250660975774,
|
||||
"rewards/accuracy_reward": 0.7032312775651614,
|
||||
"rewards/format_reward": 0.9940476194024086,
|
||||
"rewards/influence_reward": 0.6611394360661507,
|
||||
"rewards/len_reward": 0.9009353543321291,
|
||||
"step": 178,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.005316848002538006,
|
||||
"train_runtime": 54287.5063,
|
||||
"train_samples_per_second": 0.368,
|
||||
"train_steps_per_second": 0.003
|
||||
}
|
||||
],
|
||||
"logging_steps": 5,
|
||||
"max_steps": 178,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 2,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": false,
|
||||
"should_training_stop": false
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 0.0,
|
||||
"train_batch_size": 2,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:666c4ecfde782b2527fb68eea916d188daf42e9641d4570af2936d60f6200392
|
||||
size 7288
|
||||
Reference in New Issue
Block a user