初始化项目，由ModelHub XC社区提供模型

Model: fpadovani/eus-latn-10mb-10mb_seed3407 Source: Original Platform
2026-06-28 05:31:17 +08:00
commit 213141ef9f
250 changed files with 636730 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,58 @@
 ---
 base_model: goldfish-models/eus_latn_10mb
 library_name: transformers
 model_name: eus-latn-10mb-10mb_seed3407
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 ---
 # Model Card for eus-latn-10mb-10mb_seed3407
 This model is a fine-tuned version of [goldfish-models/eus_latn_10mb](https://huggingface.co/goldfish-models/eus_latn_10mb).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ```python
 from transformers import pipeline
 question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
 generator = pipeline("text-generation", model="fpadovani/eus-latn-10mb-10mb_seed3407", device="cuda")
 output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
 print(output["generated_text"])
 ```
 ## Training procedure
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/f-padovani-university-of-groningen/new_tokenizers/runs/75629565) 
 This model was trained with SFT.
 ### Framework versions
 - TRL: 0.23.0
 - Transformers: 4.56.2
 - Pytorch: 2.11.0
 - Datasets: 4.8.4
 - Tokenizers: 0.22.1
 ## Citations
 Cite TRL as:
 ```bibtex
@misc{vonwerra2022trl,
 	title        = {{TRL: Transformer Reinforcement Learning}},
 	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
 	year         = 2020,
 	journal      = {GitHub repository},
 	publisher    = {GitHub},
 	howpublished = {\url{https://github.com/huggingface/trl}}
 }
 ```
--- a/added_tokens.json
+++ b/added_tokens.json
--- a/checkpoint-1000/added_tokens.json
+++ b/checkpoint-1000/added_tokens.json
--- a/checkpoint-1000/config.json
+++ b/checkpoint-1000/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-1000/generation_config.json
+++ b/checkpoint-1000/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-1000/model.safetensors
+++ b/checkpoint-1000/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:cc71f53de9b8fe6f8f930d663c89b77842eef7196d3442e9bb45d7abc2aa0101
 size 78179408
--- a/checkpoint-1000/rng_state.pth
+++ b/checkpoint-1000/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:63ddbc91ef021fd4369090eb91c709d6afda7b32b5c88b5e124970fb2606c6ed
 size 14645
--- a/checkpoint-1000/special_tokens_map.json
+++ b/checkpoint-1000/special_tokens_map.json
--- a/checkpoint-1000/spiece.model
+++ b/checkpoint-1000/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-1000/tokenizer_config.json
+++ b/checkpoint-1000/tokenizer_config.json
--- a/checkpoint-1000/trainer_state.json
+++ b/checkpoint-1000/trainer_state.json
--- a/checkpoint-1000/training_args.bin
+++ b/checkpoint-1000/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-10000/added_tokens.json
+++ b/checkpoint-10000/added_tokens.json
--- a/checkpoint-10000/config.json
+++ b/checkpoint-10000/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-10000/generation_config.json
+++ b/checkpoint-10000/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-10000/model.safetensors
+++ b/checkpoint-10000/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d0d7db4e722638d66f452fbea2bfa69af198af027eb7786b1c1096a06c7ff78f
 size 78179408
--- a/checkpoint-10000/rng_state.pth
+++ b/checkpoint-10000/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:91c79d77edb42399fd12fb181da105564c50498cb0ad4f79a3d8f9e7fd7d8796
 size 14645
--- a/checkpoint-10000/special_tokens_map.json
+++ b/checkpoint-10000/special_tokens_map.json
--- a/checkpoint-10000/spiece.model
+++ b/checkpoint-10000/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-10000/tokenizer_config.json
+++ b/checkpoint-10000/tokenizer_config.json
--- a/checkpoint-10000/trainer_state.json
+++ b/checkpoint-10000/trainer_state.json
--- a/checkpoint-10000/training_args.bin
+++ b/checkpoint-10000/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-10500/added_tokens.json
+++ b/checkpoint-10500/added_tokens.json
--- a/checkpoint-10500/config.json
+++ b/checkpoint-10500/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-10500/generation_config.json
+++ b/checkpoint-10500/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-10500/model.safetensors
+++ b/checkpoint-10500/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:1b874cecff50a2cfdb9017182f7af4e8bd502708becd86211af96a8943c819aa
 size 78179408
--- a/checkpoint-10500/rng_state.pth
+++ b/checkpoint-10500/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:5ac8dc317795b69795969ffd0071fd2f06944d60fcce944e67a19a46c3e9d988
 size 14645
--- a/checkpoint-10500/special_tokens_map.json
+++ b/checkpoint-10500/special_tokens_map.json
--- a/checkpoint-10500/spiece.model
+++ b/checkpoint-10500/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-10500/tokenizer_config.json
+++ b/checkpoint-10500/tokenizer_config.json
--- a/checkpoint-10500/trainer_state.json
+++ b/checkpoint-10500/trainer_state.json
--- a/checkpoint-10500/training_args.bin
+++ b/checkpoint-10500/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-11000/added_tokens.json
+++ b/checkpoint-11000/added_tokens.json
--- a/checkpoint-11000/config.json
+++ b/checkpoint-11000/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-11000/generation_config.json
+++ b/checkpoint-11000/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-11000/model.safetensors
+++ b/checkpoint-11000/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f8f6c5d4c1018ed8a3cea7a85f5c433f350d28b6924379f980aa50c96eac2634
 size 78179408
--- a/checkpoint-11000/rng_state.pth
+++ b/checkpoint-11000/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:88805fb939487e041e93aeb2a6a4368172006ff55ed8f20c040ab5b4160d328f
 size 14645
--- a/checkpoint-11000/special_tokens_map.json
+++ b/checkpoint-11000/special_tokens_map.json
--- a/checkpoint-11000/spiece.model
+++ b/checkpoint-11000/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-11000/tokenizer_config.json
+++ b/checkpoint-11000/tokenizer_config.json
--- a/checkpoint-11000/trainer_state.json
+++ b/checkpoint-11000/trainer_state.json
--- a/checkpoint-11000/training_args.bin
+++ b/checkpoint-11000/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-11500/added_tokens.json
+++ b/checkpoint-11500/added_tokens.json
--- a/checkpoint-11500/config.json
+++ b/checkpoint-11500/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-11500/generation_config.json
+++ b/checkpoint-11500/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-11500/model.safetensors
+++ b/checkpoint-11500/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:74a7332251e0ee8566876836be3a816aa66bbb5796642491f7c4e6a57be7f0b5
 size 78179408
--- a/checkpoint-11500/rng_state.pth
+++ b/checkpoint-11500/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c8754e0e099e04a57cb2bf4021162d2050e7ff5004e5cbd80cf3ae1dccabc889
 size 14645
--- a/checkpoint-11500/special_tokens_map.json
+++ b/checkpoint-11500/special_tokens_map.json
--- a/checkpoint-11500/spiece.model
+++ b/checkpoint-11500/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-11500/tokenizer_config.json
+++ b/checkpoint-11500/tokenizer_config.json
--- a/checkpoint-11500/trainer_state.json
+++ b/checkpoint-11500/trainer_state.json
--- a/checkpoint-11500/training_args.bin
+++ b/checkpoint-11500/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-11630/added_tokens.json
+++ b/checkpoint-11630/added_tokens.json
--- a/checkpoint-11630/config.json
+++ b/checkpoint-11630/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-11630/generation_config.json
+++ b/checkpoint-11630/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-11630/model.safetensors
+++ b/checkpoint-11630/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d86d3b00f9f1685400abcad9f13c33d22b84ba0052ba0a8853d246e951c5ca71
 size 78179408
--- a/checkpoint-11630/rng_state.pth
+++ b/checkpoint-11630/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:807f45588ec412257d63aab42cc1c2684cfd93d63c1a3b4612a169b217f168e0
 size 14645
--- a/checkpoint-11630/special_tokens_map.json
+++ b/checkpoint-11630/special_tokens_map.json
--- a/checkpoint-11630/spiece.model
+++ b/checkpoint-11630/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-11630/tokenizer_config.json
+++ b/checkpoint-11630/tokenizer_config.json
--- a/checkpoint-11630/trainer_state.json
+++ b/checkpoint-11630/trainer_state.json
--- a/checkpoint-11630/training_args.bin
+++ b/checkpoint-11630/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-1500/added_tokens.json
+++ b/checkpoint-1500/added_tokens.json
--- a/checkpoint-1500/config.json
+++ b/checkpoint-1500/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-1500/generation_config.json
+++ b/checkpoint-1500/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-1500/model.safetensors
+++ b/checkpoint-1500/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:fff243d912e6f4e2968f942b3abe0cc7f53b66c83a72e7f9d70a8d5390cd5447
 size 78179408
--- a/checkpoint-1500/rng_state.pth
+++ b/checkpoint-1500/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:905c030bc6e071225bb57d91bdc792eff50465f52b699b63806672b96f2f0302
 size 14645
--- a/checkpoint-1500/special_tokens_map.json
+++ b/checkpoint-1500/special_tokens_map.json
--- a/checkpoint-1500/spiece.model
+++ b/checkpoint-1500/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-1500/tokenizer_config.json
+++ b/checkpoint-1500/tokenizer_config.json
--- a/checkpoint-1500/trainer_state.json
+++ b/checkpoint-1500/trainer_state.json
--- a/checkpoint-1500/training_args.bin
+++ b/checkpoint-1500/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-2000/added_tokens.json
+++ b/checkpoint-2000/added_tokens.json
--- a/checkpoint-2000/config.json
+++ b/checkpoint-2000/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-2000/generation_config.json
+++ b/checkpoint-2000/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-2000/model.safetensors
+++ b/checkpoint-2000/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:154c0606f6669f5019126ebd3063deef9315e9dc66acb35c05ea09b9e58c22c5
 size 78179408
--- a/checkpoint-2000/rng_state.pth
+++ b/checkpoint-2000/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:4eecf20162eff697c379d919d84a9480092a65b10234b3f045ed3d42f40f2c1c
 size 14645
--- a/checkpoint-2000/special_tokens_map.json
+++ b/checkpoint-2000/special_tokens_map.json
--- a/checkpoint-2000/spiece.model
+++ b/checkpoint-2000/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-2000/tokenizer_config.json
+++ b/checkpoint-2000/tokenizer_config.json
--- a/checkpoint-2000/trainer_state.json
+++ b/checkpoint-2000/trainer_state.json
--- a/checkpoint-2000/training_args.bin
+++ b/checkpoint-2000/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-2500/added_tokens.json
+++ b/checkpoint-2500/added_tokens.json
--- a/checkpoint-2500/config.json
+++ b/checkpoint-2500/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-2500/generation_config.json
+++ b/checkpoint-2500/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-2500/model.safetensors
+++ b/checkpoint-2500/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:b496df9323d0c6b2c8c79abeb72ab71f8c4e2494bd652fca401b15011e9e8a63
 size 78179408
--- a/checkpoint-2500/rng_state.pth
+++ b/checkpoint-2500/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:4ea50940afbf0e6b6b21a57c767494477b82e3b9545f16a75c2f2c745410397f
 size 14645
--- a/checkpoint-2500/special_tokens_map.json
+++ b/checkpoint-2500/special_tokens_map.json
--- a/checkpoint-2500/spiece.model
+++ b/checkpoint-2500/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/checkpoint-2500/tokenizer_config.json
+++ b/checkpoint-2500/tokenizer_config.json
--- a/checkpoint-2500/trainer_state.json
+++ b/checkpoint-2500/trainer_state.json
--- a/checkpoint-2500/training_args.bin
+++ b/checkpoint-2500/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db5ec319254a2dd7bc234553d7f3ff9fe01f8cb3163c4c7a3744b038dc8a187d
 size 6289
--- a/checkpoint-3000/added_tokens.json
+++ b/checkpoint-3000/added_tokens.json
--- a/checkpoint-3000/config.json
+++ b/checkpoint-3000/config.json
@@ -0,0 +1,34 @@
 {
  "activation_function": "gelu",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50000,
  "dtype": "bfloat16",
  "embd_pdrop": 0.1,
  "eos_token_id": 50001,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 512,
  "n_embd": 512,
  "n_head": 8,
  "n_inner": 2048,
  "n_layer": 4,
  "n_positions": 512,
  "pad_token_id": 50002,
  "prefix": "[CLS]",
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.56.2",
  "use_cache": true,
  "vocab_size": 51200
 }
--- a/checkpoint-3000/generation_config.json
+++ b/checkpoint-3000/generation_config.json
@@ -0,0 +1,9 @@
 {
  "_from_model_config": true,
  "bos_token_id": 50000,
  "eos_token_id": [
    50001
  ],
  "pad_token_id": 50002,
  "transformers_version": "4.56.2"
 }
--- a/checkpoint-3000/model.safetensors
+++ b/checkpoint-3000/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:58ce939eaf17acb88e8c65afb6a12d7a218f8f14610fb4197a249478195171a5
 size 78179408
--- a/checkpoint-3000/rng_state.pth
+++ b/checkpoint-3000/rng_state.pth
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d91939316df6135915f1d11ef0b4ed34f661805515193d9577e2e41a713a5eb9
 size 14645
--- a/checkpoint-3000/special_tokens_map.json
+++ b/checkpoint-3000/special_tokens_map.json
--- a/checkpoint-3000/spiece.model
+++ b/checkpoint-3000/spiece.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:f712559c1ce506dfb7aa691ca770055d2170d3339aa7ee2e0e463181c2fe1e01
 size 1173726
--- a/Show More
+++ b/Show More