初始化项目,由ModelHub XC社区提供模型
Model: fpadovani/eus-latn-10mb-10mb_seed3407 Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
58
README.md
Normal file
58
README.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
base_model: goldfish-models/eus_latn_10mb
|
||||||
|
library_name: transformers
|
||||||
|
model_name: eus-latn-10mb-10mb_seed3407
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
- trl
|
||||||
|
- sft
|
||||||
|
licence: license
|
||||||
|
---
|
||||||
|
|
||||||
|
# Model Card for eus-latn-10mb-10mb_seed3407
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [goldfish-models/eus_latn_10mb](https://huggingface.co/goldfish-models/eus_latn_10mb).
|
||||||
|
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||||
|
generator = pipeline("text-generation", model="fpadovani/eus-latn-10mb-10mb_seed3407", device="cuda")
|
||||||
|
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||||
|
print(output["generated_text"])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/f-padovani-university-of-groningen/new_tokenizers/runs/75629565)
|
||||||
|
|
||||||
|
|
||||||
|
This model was trained with SFT.
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- TRL: 0.23.0
|
||||||
|
- Transformers: 4.56.2
|
||||||
|
- Pytorch: 2.11.0
|
||||||
|
- Datasets: 4.8.4
|
||||||
|
- Tokenizers: 0.22.1
|
||||||
|
|
||||||
|
## Citations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Cite TRL as:
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@misc{vonwerra2022trl,
|
||||||
|
title = {{TRL: Transformer Reinforcement Learning}},
|
||||||
|
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
||||||
|
year = 2020,
|
||||||
|
journal = {GitHub repository},
|
||||||
|
publisher = {GitHub},
|
||||||
|
howpublished = {\url{https://github.com/huggingface/trl}}
|
||||||
|
}
|
||||||
|
```
|
||||||
1202
added_tokens.json
Normal file
1202
added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
1202
checkpoint-1000/added_tokens.json
Normal file
1202
checkpoint-1000/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-1000/config.json
Normal file
34
checkpoint-1000/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-1000/generation_config.json
Normal file
9
checkpoint-1000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-1000/model.safetensors
Normal file
3
checkpoint-1000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:cc71f53de9b8fe6f8f930d663c89b77842eef7196d3442e9bb45d7abc2aa0101
|
||||||
|
size 78179408
|
||||||
3
checkpoint-1000/rng_state.pth
Normal file
3
checkpoint-1000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:63ddbc91ef021fd4369090eb91c709d6afda7b32b5c88b5e124970fb2606c6ed
|
||||||
|
size 14645
|
||||||
1249
checkpoint-1000/special_tokens_map.json
Normal file
1249
checkpoint-1000/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-1000/spiece.model
Normal file
3
checkpoint-1000/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-1000/tokenizer_config.json
Normal file
10825
checkpoint-1000/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
2056
checkpoint-1000/trainer_state.json
Normal file
2056
checkpoint-1000/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-1000/training_args.bin
Normal file
3
checkpoint-1000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-10000/added_tokens.json
Normal file
1202
checkpoint-10000/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-10000/config.json
Normal file
34
checkpoint-10000/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-10000/generation_config.json
Normal file
9
checkpoint-10000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-10000/model.safetensors
Normal file
3
checkpoint-10000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d0d7db4e722638d66f452fbea2bfa69af198af027eb7786b1c1096a06c7ff78f
|
||||||
|
size 78179408
|
||||||
3
checkpoint-10000/rng_state.pth
Normal file
3
checkpoint-10000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:91c79d77edb42399fd12fb181da105564c50498cb0ad4f79a3d8f9e7fd7d8796
|
||||||
|
size 14645
|
||||||
1249
checkpoint-10000/special_tokens_map.json
Normal file
1249
checkpoint-10000/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-10000/spiece.model
Normal file
3
checkpoint-10000/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-10000/tokenizer_config.json
Normal file
10825
checkpoint-10000/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
20254
checkpoint-10000/trainer_state.json
Normal file
20254
checkpoint-10000/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-10000/training_args.bin
Normal file
3
checkpoint-10000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-10500/added_tokens.json
Normal file
1202
checkpoint-10500/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-10500/config.json
Normal file
34
checkpoint-10500/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-10500/generation_config.json
Normal file
9
checkpoint-10500/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-10500/model.safetensors
Normal file
3
checkpoint-10500/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:1b874cecff50a2cfdb9017182f7af4e8bd502708becd86211af96a8943c819aa
|
||||||
|
size 78179408
|
||||||
3
checkpoint-10500/rng_state.pth
Normal file
3
checkpoint-10500/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:5ac8dc317795b69795969ffd0071fd2f06944d60fcce944e67a19a46c3e9d988
|
||||||
|
size 14645
|
||||||
1249
checkpoint-10500/special_tokens_map.json
Normal file
1249
checkpoint-10500/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-10500/spiece.model
Normal file
3
checkpoint-10500/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-10500/tokenizer_config.json
Normal file
10825
checkpoint-10500/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
21265
checkpoint-10500/trainer_state.json
Normal file
21265
checkpoint-10500/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-10500/training_args.bin
Normal file
3
checkpoint-10500/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-11000/added_tokens.json
Normal file
1202
checkpoint-11000/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-11000/config.json
Normal file
34
checkpoint-11000/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-11000/generation_config.json
Normal file
9
checkpoint-11000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-11000/model.safetensors
Normal file
3
checkpoint-11000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f8f6c5d4c1018ed8a3cea7a85f5c433f350d28b6924379f980aa50c96eac2634
|
||||||
|
size 78179408
|
||||||
3
checkpoint-11000/rng_state.pth
Normal file
3
checkpoint-11000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:88805fb939487e041e93aeb2a6a4368172006ff55ed8f20c040ab5b4160d328f
|
||||||
|
size 14645
|
||||||
1249
checkpoint-11000/special_tokens_map.json
Normal file
1249
checkpoint-11000/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11000/spiece.model
Normal file
3
checkpoint-11000/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-11000/tokenizer_config.json
Normal file
10825
checkpoint-11000/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
22276
checkpoint-11000/trainer_state.json
Normal file
22276
checkpoint-11000/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11000/training_args.bin
Normal file
3
checkpoint-11000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-11500/added_tokens.json
Normal file
1202
checkpoint-11500/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-11500/config.json
Normal file
34
checkpoint-11500/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-11500/generation_config.json
Normal file
9
checkpoint-11500/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-11500/model.safetensors
Normal file
3
checkpoint-11500/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:74a7332251e0ee8566876836be3a816aa66bbb5796642491f7c4e6a57be7f0b5
|
||||||
|
size 78179408
|
||||||
3
checkpoint-11500/rng_state.pth
Normal file
3
checkpoint-11500/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c8754e0e099e04a57cb2bf4021162d2050e7ff5004e5cbd80cf3ae1dccabc889
|
||||||
|
size 14645
|
||||||
1249
checkpoint-11500/special_tokens_map.json
Normal file
1249
checkpoint-11500/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11500/spiece.model
Normal file
3
checkpoint-11500/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-11500/tokenizer_config.json
Normal file
10825
checkpoint-11500/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
23287
checkpoint-11500/trainer_state.json
Normal file
23287
checkpoint-11500/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11500/training_args.bin
Normal file
3
checkpoint-11500/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-11630/added_tokens.json
Normal file
1202
checkpoint-11630/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-11630/config.json
Normal file
34
checkpoint-11630/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-11630/generation_config.json
Normal file
9
checkpoint-11630/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-11630/model.safetensors
Normal file
3
checkpoint-11630/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d86d3b00f9f1685400abcad9f13c33d22b84ba0052ba0a8853d246e951c5ca71
|
||||||
|
size 78179408
|
||||||
3
checkpoint-11630/rng_state.pth
Normal file
3
checkpoint-11630/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:807f45588ec412257d63aab42cc1c2684cfd93d63c1a3b4612a169b217f168e0
|
||||||
|
size 14645
|
||||||
1249
checkpoint-11630/special_tokens_map.json
Normal file
1249
checkpoint-11630/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11630/spiece.model
Normal file
3
checkpoint-11630/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-11630/tokenizer_config.json
Normal file
10825
checkpoint-11630/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
23547
checkpoint-11630/trainer_state.json
Normal file
23547
checkpoint-11630/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-11630/training_args.bin
Normal file
3
checkpoint-11630/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-1500/added_tokens.json
Normal file
1202
checkpoint-1500/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-1500/config.json
Normal file
34
checkpoint-1500/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-1500/generation_config.json
Normal file
9
checkpoint-1500/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-1500/model.safetensors
Normal file
3
checkpoint-1500/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fff243d912e6f4e2968f942b3abe0cc7f53b66c83a72e7f9d70a8d5390cd5447
|
||||||
|
size 78179408
|
||||||
3
checkpoint-1500/rng_state.pth
Normal file
3
checkpoint-1500/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:905c030bc6e071225bb57d91bdc792eff50465f52b699b63806672b96f2f0302
|
||||||
|
size 14645
|
||||||
1249
checkpoint-1500/special_tokens_map.json
Normal file
1249
checkpoint-1500/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-1500/spiece.model
Normal file
3
checkpoint-1500/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-1500/tokenizer_config.json
Normal file
10825
checkpoint-1500/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
3067
checkpoint-1500/trainer_state.json
Normal file
3067
checkpoint-1500/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-1500/training_args.bin
Normal file
3
checkpoint-1500/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-2000/added_tokens.json
Normal file
1202
checkpoint-2000/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-2000/config.json
Normal file
34
checkpoint-2000/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-2000/generation_config.json
Normal file
9
checkpoint-2000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-2000/model.safetensors
Normal file
3
checkpoint-2000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:154c0606f6669f5019126ebd3063deef9315e9dc66acb35c05ea09b9e58c22c5
|
||||||
|
size 78179408
|
||||||
3
checkpoint-2000/rng_state.pth
Normal file
3
checkpoint-2000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4eecf20162eff697c379d919d84a9480092a65b10234b3f045ed3d42f40f2c1c
|
||||||
|
size 14645
|
||||||
1249
checkpoint-2000/special_tokens_map.json
Normal file
1249
checkpoint-2000/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-2000/spiece.model
Normal file
3
checkpoint-2000/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-2000/tokenizer_config.json
Normal file
10825
checkpoint-2000/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
4078
checkpoint-2000/trainer_state.json
Normal file
4078
checkpoint-2000/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-2000/training_args.bin
Normal file
3
checkpoint-2000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-2500/added_tokens.json
Normal file
1202
checkpoint-2500/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-2500/config.json
Normal file
34
checkpoint-2500/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-2500/generation_config.json
Normal file
9
checkpoint-2500/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-2500/model.safetensors
Normal file
3
checkpoint-2500/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b496df9323d0c6b2c8c79abeb72ab71f8c4e2494bd652fca401b15011e9e8a63
|
||||||
|
size 78179408
|
||||||
3
checkpoint-2500/rng_state.pth
Normal file
3
checkpoint-2500/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4ea50940afbf0e6b6b21a57c767494477b82e3b9545f16a75c2f2c745410397f
|
||||||
|
size 14645
|
||||||
1249
checkpoint-2500/special_tokens_map.json
Normal file
1249
checkpoint-2500/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-2500/spiece.model
Normal file
3
checkpoint-2500/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
10825
checkpoint-2500/tokenizer_config.json
Normal file
10825
checkpoint-2500/tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
5089
checkpoint-2500/trainer_state.json
Normal file
5089
checkpoint-2500/trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-2500/training_args.bin
Normal file
3
checkpoint-2500/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
|
||||||
|
size 6289
|
||||||
1202
checkpoint-3000/added_tokens.json
Normal file
1202
checkpoint-3000/added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
34
checkpoint-3000/config.json
Normal file
34
checkpoint-3000/config.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50001,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 512,
|
||||||
|
"n_embd": 512,
|
||||||
|
"n_head": 8,
|
||||||
|
"n_inner": 2048,
|
||||||
|
"n_layer": 4,
|
||||||
|
"n_positions": 512,
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"prefix": "[CLS]",
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 51200
|
||||||
|
}
|
||||||
9
checkpoint-3000/generation_config.json
Normal file
9
checkpoint-3000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50000,
|
||||||
|
"eos_token_id": [
|
||||||
|
50001
|
||||||
|
],
|
||||||
|
"pad_token_id": 50002,
|
||||||
|
"transformers_version": "4.56.2"
|
||||||
|
}
|
||||||
3
checkpoint-3000/model.safetensors
Normal file
3
checkpoint-3000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:58ce939eaf17acb88e8c65afb6a12d7a218f8f14610fb4197a249478195171a5
|
||||||
|
size 78179408
|
||||||
3
checkpoint-3000/rng_state.pth
Normal file
3
checkpoint-3000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d91939316df6135915f1d11ef0b4ed34f661805515193d9577e2e41a713a5eb9
|
||||||
|
size 14645
|
||||||
1249
checkpoint-3000/special_tokens_map.json
Normal file
1249
checkpoint-3000/special_tokens_map.json
Normal file
File diff suppressed because it is too large
Load Diff
3
checkpoint-3000/spiece.model
Normal file
3
checkpoint-3000/spiece.model
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
|
||||||
|
size 1173726
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user