初始化项目,由ModelHub XC社区提供模型
Model: infiniV/ml-intern-smoke-test Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
58
README.md
Normal file
58
README.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
base_model: HuggingFaceTB/SmolLM2-135M
|
||||||
|
library_name: transformers
|
||||||
|
model_name: ml-intern-smoke-test
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
- sft
|
||||||
|
- trl
|
||||||
|
licence: license
|
||||||
|
---
|
||||||
|
|
||||||
|
# Model Card for ml-intern-smoke-test
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M).
|
||||||
|
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||||
|
generator = pipeline("text-generation", model="infiniV/ml-intern-smoke-test", device="cuda")
|
||||||
|
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||||
|
print(output["generated_text"])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
This model was trained with SFT.
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- TRL: 1.2.0
|
||||||
|
- Transformers: 5.6.2
|
||||||
|
- Pytorch: 2.6.0+cu124
|
||||||
|
- Datasets: 4.8.4
|
||||||
|
- Tokenizers: 0.22.2
|
||||||
|
|
||||||
|
## Citations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Cite TRL as:
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@software{vonwerra2020trl,
|
||||||
|
title = {{TRL: Transformers Reinforcement Learning}},
|
||||||
|
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
|
||||||
|
license = {Apache-2.0},
|
||||||
|
url = {https://github.com/huggingface/trl},
|
||||||
|
year = {2020}
|
||||||
|
}
|
||||||
|
```
|
||||||
34
config.json
Normal file
34
config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"dtype": "float32",
|
||||||
|
"eos_token_id": 0,
|
||||||
|
"head_dim": 64,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 576,
|
||||||
|
"initializer_range": 0.041666666666666664,
|
||||||
|
"intermediate_size": 1536,
|
||||||
|
"is_llama_config": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 9,
|
||||||
|
"num_hidden_layers": 30,
|
||||||
|
"num_key_value_heads": 3,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_interleaved": false,
|
||||||
|
"rope_parameters": {
|
||||||
|
"rope_theta": 100000,
|
||||||
|
"rope_type": "default"
|
||||||
|
},
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers_version": "5.6.2",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"eos_token_id": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"transformers_version": "5.6.2"
|
||||||
|
}
|
||||||
34
last-checkpoint/config.json
Normal file
34
last-checkpoint/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"dtype": "float32",
|
||||||
|
"eos_token_id": 0,
|
||||||
|
"head_dim": 64,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 576,
|
||||||
|
"initializer_range": 0.041666666666666664,
|
||||||
|
"intermediate_size": 1536,
|
||||||
|
"is_llama_config": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 9,
|
||||||
|
"num_hidden_layers": 30,
|
||||||
|
"num_key_value_heads": 3,
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_interleaved": false,
|
||||||
|
"rope_parameters": {
|
||||||
|
"rope_theta": 100000,
|
||||||
|
"rope_type": "default"
|
||||||
|
},
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers_version": "5.6.2",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
9
last-checkpoint/generation_config.json
Normal file
9
last-checkpoint/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 0,
|
||||||
|
"eos_token_id": [
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"pad_token_id": 0,
|
||||||
|
"transformers_version": "5.6.2"
|
||||||
|
}
|
||||||
3
last-checkpoint/model.safetensors
Normal file
3
last-checkpoint/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:46449eca4f765a9ea030700fe2088d993e5c32a265f8954379b815e814f56b4d
|
||||||
|
size 538090408
|
||||||
3
last-checkpoint/optimizer.pt
Normal file
3
last-checkpoint/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d69a1ae1e89a6a95c4250947267c2db31aac42cc65aa816e0ee3734cfd9e5eb1
|
||||||
|
size 1076349050
|
||||||
3
last-checkpoint/rng_state.pth
Normal file
3
last-checkpoint/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2b66e3cc7c452b707ddac5caf0aa17618afb9bc1a0333600a22c4afb353f3165
|
||||||
|
size 14244
|
||||||
3
last-checkpoint/scaler.pt
Normal file
3
last-checkpoint/scaler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:11c2ec697a3b8b5a4895af88c59dbba480386a7d5b8df6ae55e6659177ce0be4
|
||||||
|
size 988
|
||||||
3
last-checkpoint/scheduler.pt
Normal file
3
last-checkpoint/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a36d45a540b06d003e6e4989b2d794c797e01ca5c38de986fb353c8781098149
|
||||||
|
size 1064
|
||||||
244965
last-checkpoint/tokenizer.json
Normal file
244965
last-checkpoint/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
34
last-checkpoint/tokenizer_config.json
Normal file
34
last-checkpoint/tokenizer_config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"backend": "tokenizers",
|
||||||
|
"bos_token": "<|endoftext|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"errors": "replace",
|
||||||
|
"extra_special_tokens": [
|
||||||
|
"<|endoftext|>",
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<repo_name>",
|
||||||
|
"<reponame>",
|
||||||
|
"<file_sep>",
|
||||||
|
"<filename>",
|
||||||
|
"<gh_stars>",
|
||||||
|
"<issue_start>",
|
||||||
|
"<issue_comment>",
|
||||||
|
"<issue_closed>",
|
||||||
|
"<jupyter_start>",
|
||||||
|
"<jupyter_text>",
|
||||||
|
"<jupyter_code>",
|
||||||
|
"<jupyter_output>",
|
||||||
|
"<jupyter_script>",
|
||||||
|
"<empty_output>"
|
||||||
|
],
|
||||||
|
"is_local": false,
|
||||||
|
"local_files_only": false,
|
||||||
|
"model_max_length": 8192,
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"unk_token": "<|endoftext|>",
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
284
last-checkpoint/trainer_state.json
Normal file
284
last-checkpoint/trainer_state.json
Normal file
@@ -0,0 +1,284 @@
|
|||||||
|
{
|
||||||
|
"best_global_step": null,
|
||||||
|
"best_metric": null,
|
||||||
|
"best_model_checkpoint": null,
|
||||||
|
"epoch": 1.4705882352941178,
|
||||||
|
"eval_steps": 500,
|
||||||
|
"global_step": 25,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"entropy": 3.6089425086975098,
|
||||||
|
"epoch": 0.058823529411764705,
|
||||||
|
"grad_norm": NaN,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 6.292151927947998,
|
||||||
|
"mean_token_accuracy": 0.0,
|
||||||
|
"num_tokens": 15.0,
|
||||||
|
"step": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.715427875518799,
|
||||||
|
"epoch": 0.11764705882352941,
|
||||||
|
"grad_norm": Infinity,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 6.3516693115234375,
|
||||||
|
"mean_token_accuracy": 0.0,
|
||||||
|
"num_tokens": 22.0,
|
||||||
|
"step": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 6.061905860900879,
|
||||||
|
"epoch": 0.17647058823529413,
|
||||||
|
"grad_norm": NaN,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 4.695749282836914,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 29.0,
|
||||||
|
"step": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.138950824737549,
|
||||||
|
"epoch": 0.23529411764705882,
|
||||||
|
"grad_norm": 34.225440979003906,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 4.924757957458496,
|
||||||
|
"mean_token_accuracy": 0.375,
|
||||||
|
"num_tokens": 41.0,
|
||||||
|
"step": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.107641220092773,
|
||||||
|
"epoch": 0.29411764705882354,
|
||||||
|
"grad_norm": Infinity,
|
||||||
|
"learning_rate": 4.000000000000001e-06,
|
||||||
|
"loss": 4.769550800323486,
|
||||||
|
"mean_token_accuracy": 0.0,
|
||||||
|
"num_tokens": 47.0,
|
||||||
|
"step": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.351223945617676,
|
||||||
|
"epoch": 0.35294117647058826,
|
||||||
|
"grad_norm": 26.4381046295166,
|
||||||
|
"learning_rate": 4.000000000000001e-06,
|
||||||
|
"loss": 2.740773916244507,
|
||||||
|
"mean_token_accuracy": 0.4285714328289032,
|
||||||
|
"num_tokens": 62.0,
|
||||||
|
"step": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.605663299560547,
|
||||||
|
"epoch": 0.4117647058823529,
|
||||||
|
"grad_norm": 44.312320709228516,
|
||||||
|
"learning_rate": 8.000000000000001e-06,
|
||||||
|
"loss": 4.73244571685791,
|
||||||
|
"mean_token_accuracy": 0.25,
|
||||||
|
"num_tokens": 70.0,
|
||||||
|
"step": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.652010440826416,
|
||||||
|
"epoch": 0.47058823529411764,
|
||||||
|
"grad_norm": 29.16073989868164,
|
||||||
|
"learning_rate": 1.2e-05,
|
||||||
|
"loss": 3.551349639892578,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 83.0,
|
||||||
|
"step": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 3.92993426322937,
|
||||||
|
"epoch": 0.5294117647058824,
|
||||||
|
"grad_norm": 24.797420501708984,
|
||||||
|
"learning_rate": 1.6000000000000003e-05,
|
||||||
|
"loss": 3.086703300476074,
|
||||||
|
"mean_token_accuracy": 0.5454545617103577,
|
||||||
|
"num_tokens": 101.0,
|
||||||
|
"step": 9
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.687472343444824,
|
||||||
|
"epoch": 0.5882352941176471,
|
||||||
|
"grad_norm": 65.40557098388672,
|
||||||
|
"learning_rate": 2e-05,
|
||||||
|
"loss": 6.584832668304443,
|
||||||
|
"mean_token_accuracy": 0.0,
|
||||||
|
"num_tokens": 106.0,
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.460619926452637,
|
||||||
|
"epoch": 0.6470588235294118,
|
||||||
|
"grad_norm": 62.57985305786133,
|
||||||
|
"learning_rate": 1.9975640502598243e-05,
|
||||||
|
"loss": 5.282391548156738,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 111.0,
|
||||||
|
"step": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.458609580993652,
|
||||||
|
"epoch": 0.7058823529411765,
|
||||||
|
"grad_norm": 47.68376922607422,
|
||||||
|
"learning_rate": 1.9902680687415704e-05,
|
||||||
|
"loss": 3.4104318618774414,
|
||||||
|
"mean_token_accuracy": 0.5,
|
||||||
|
"num_tokens": 118.0,
|
||||||
|
"step": 12
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.297245979309082,
|
||||||
|
"epoch": 0.7647058823529411,
|
||||||
|
"grad_norm": 27.625783920288086,
|
||||||
|
"learning_rate": 1.9781476007338058e-05,
|
||||||
|
"loss": 2.5839743614196777,
|
||||||
|
"mean_token_accuracy": 0.6000000238418579,
|
||||||
|
"num_tokens": 126.0,
|
||||||
|
"step": 13
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.2264180183410645,
|
||||||
|
"epoch": 0.8235294117647058,
|
||||||
|
"grad_norm": 21.131492614746094,
|
||||||
|
"learning_rate": 1.961261695938319e-05,
|
||||||
|
"loss": 2.5414154529571533,
|
||||||
|
"mean_token_accuracy": 0.5714285969734192,
|
||||||
|
"num_tokens": 138.0,
|
||||||
|
"step": 14
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.7774658203125,
|
||||||
|
"epoch": 0.8823529411764706,
|
||||||
|
"grad_norm": 40.83763885498047,
|
||||||
|
"learning_rate": 1.9396926207859085e-05,
|
||||||
|
"loss": 2.8697211742401123,
|
||||||
|
"mean_token_accuracy": 0.4000000059604645,
|
||||||
|
"num_tokens": 145.0,
|
||||||
|
"step": 15
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.260831832885742,
|
||||||
|
"epoch": 0.9411764705882353,
|
||||||
|
"grad_norm": 40.107913970947266,
|
||||||
|
"learning_rate": 1.913545457642601e-05,
|
||||||
|
"loss": 2.11948561668396,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 153.0,
|
||||||
|
"step": 16
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 5.279318332672119,
|
||||||
|
"epoch": 1.0,
|
||||||
|
"grad_norm": 34.499481201171875,
|
||||||
|
"learning_rate": 1.8829475928589272e-05,
|
||||||
|
"loss": 2.028179883956909,
|
||||||
|
"mean_token_accuracy": 0.5,
|
||||||
|
"num_tokens": 161.0,
|
||||||
|
"step": 17
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 3.6013762950897217,
|
||||||
|
"epoch": 1.0588235294117647,
|
||||||
|
"grad_norm": 45.223697662353516,
|
||||||
|
"learning_rate": 1.848048096156426e-05,
|
||||||
|
"loss": 4.955894470214844,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 176.0,
|
||||||
|
"step": 18
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.529469966888428,
|
||||||
|
"epoch": 1.1176470588235294,
|
||||||
|
"grad_norm": 30.39960479736328,
|
||||||
|
"learning_rate": 1.8090169943749477e-05,
|
||||||
|
"loss": 1.4438493251800537,
|
||||||
|
"mean_token_accuracy": 0.6000000238418579,
|
||||||
|
"num_tokens": 184.0,
|
||||||
|
"step": 19
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 3.973909378051758,
|
||||||
|
"epoch": 1.1764705882352942,
|
||||||
|
"grad_norm": 18.265600204467773,
|
||||||
|
"learning_rate": 1.766044443118978e-05,
|
||||||
|
"loss": 1.7491637468338013,
|
||||||
|
"mean_token_accuracy": 0.7142857313156128,
|
||||||
|
"num_tokens": 196.0,
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.81941556930542,
|
||||||
|
"epoch": 1.2352941176470589,
|
||||||
|
"grad_norm": 37.519798278808594,
|
||||||
|
"learning_rate": 1.7193398003386514e-05,
|
||||||
|
"loss": 3.381870985031128,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 202.0,
|
||||||
|
"step": 21
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.61383056640625,
|
||||||
|
"epoch": 1.2941176470588236,
|
||||||
|
"grad_norm": 24.117401123046875,
|
||||||
|
"learning_rate": 1.6691306063588583e-05,
|
||||||
|
"loss": 0.9651630520820618,
|
||||||
|
"mean_token_accuracy": 0.75,
|
||||||
|
"num_tokens": 210.0,
|
||||||
|
"step": 22
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.4701128005981445,
|
||||||
|
"epoch": 1.3529411764705883,
|
||||||
|
"grad_norm": 28.767288208007812,
|
||||||
|
"learning_rate": 1.6156614753256583e-05,
|
||||||
|
"loss": 0.8833061456680298,
|
||||||
|
"mean_token_accuracy": 1.0,
|
||||||
|
"num_tokens": 217.0,
|
||||||
|
"step": 23
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.853976726531982,
|
||||||
|
"epoch": 1.4117647058823528,
|
||||||
|
"grad_norm": 41.10981369018555,
|
||||||
|
"learning_rate": 1.5591929034707468e-05,
|
||||||
|
"loss": 3.586022138595581,
|
||||||
|
"mean_token_accuracy": 0.3333333432674408,
|
||||||
|
"num_tokens": 222.0,
|
||||||
|
"step": 24
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entropy": 4.533289909362793,
|
||||||
|
"epoch": 1.4705882352941178,
|
||||||
|
"grad_norm": Infinity,
|
||||||
|
"learning_rate": 1.5000000000000002e-05,
|
||||||
|
"loss": 2.87251877784729,
|
||||||
|
"mean_token_accuracy": 0.5,
|
||||||
|
"num_tokens": 229.0,
|
||||||
|
"step": 25
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 1,
|
||||||
|
"max_steps": 50,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 3,
|
||||||
|
"save_steps": 25,
|
||||||
|
"stateful_callbacks": {
|
||||||
|
"TrainerControl": {
|
||||||
|
"args": {
|
||||||
|
"should_epoch_stop": false,
|
||||||
|
"should_evaluate": false,
|
||||||
|
"should_log": false,
|
||||||
|
"should_save": true,
|
||||||
|
"should_training_stop": false
|
||||||
|
},
|
||||||
|
"attributes": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total_flos": 145923548544.0,
|
||||||
|
"train_batch_size": 1,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
last-checkpoint/training_args.bin
Normal file
3
last-checkpoint/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e4c5333d0f38134cb6765a79bd8460d792c18fea15a8621e114e7d25c2deabce
|
||||||
|
size 5368
|
||||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:46449eca4f765a9ea030700fe2088d993e5c32a265f8954379b815e814f56b4d
|
||||||
|
size 538090408
|
||||||
244965
tokenizer.json
Normal file
244965
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
34
tokenizer_config.json
Normal file
34
tokenizer_config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"backend": "tokenizers",
|
||||||
|
"bos_token": "<|endoftext|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"errors": "replace",
|
||||||
|
"extra_special_tokens": [
|
||||||
|
"<|endoftext|>",
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<repo_name>",
|
||||||
|
"<reponame>",
|
||||||
|
"<file_sep>",
|
||||||
|
"<filename>",
|
||||||
|
"<gh_stars>",
|
||||||
|
"<issue_start>",
|
||||||
|
"<issue_comment>",
|
||||||
|
"<issue_closed>",
|
||||||
|
"<jupyter_start>",
|
||||||
|
"<jupyter_text>",
|
||||||
|
"<jupyter_code>",
|
||||||
|
"<jupyter_output>",
|
||||||
|
"<jupyter_script>",
|
||||||
|
"<empty_output>"
|
||||||
|
],
|
||||||
|
"is_local": false,
|
||||||
|
"local_files_only": false,
|
||||||
|
"model_max_length": 8192,
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"unk_token": "<|endoftext|>",
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e4c5333d0f38134cb6765a79bd8460d792c18fea15a8621e114e7d25c2deabce
|
||||||
|
size 5368
|
||||||
Reference in New Issue
Block a user