初始化项目,由ModelHub XC社区提供模型
Model: fpadovani/candor_np_13 Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
step_*
|
||||||
|
epoch_*
|
||||||
77
README.md
Normal file
77
README.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
---
|
||||||
|
library_name: transformers
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
model-index:
|
||||||
|
- name: candor_np_13
|
||||||
|
results: []
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||||
|
should probably proofread and complete it, then remove this comment. -->
|
||||||
|
|
||||||
|
# candor_np_13
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
||||||
|
It achieves the following results on the evaluation set:
|
||||||
|
- Loss: 4.4624
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training and evaluation data
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
### Training hyperparameters
|
||||||
|
|
||||||
|
The following hyperparameters were used during training:
|
||||||
|
- learning_rate: 0.0001
|
||||||
|
- train_batch_size: 256
|
||||||
|
- eval_batch_size: 256
|
||||||
|
- seed: 13
|
||||||
|
- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||||
|
- lr_scheduler_type: linear
|
||||||
|
- lr_scheduler_warmup_steps: 500
|
||||||
|
- num_epochs: 20
|
||||||
|
- mixed_precision_training: Native AMP
|
||||||
|
|
||||||
|
### Training results
|
||||||
|
|
||||||
|
| Training Loss | Epoch | Step | Validation Loss |
|
||||||
|
|:-------------:|:-----:|:----:|:---------------:|
|
||||||
|
| 5.762 | 1.0 | 422 | 4.7811 |
|
||||||
|
| 4.6298 | 2.0 | 844 | 4.6029 |
|
||||||
|
| 4.5095 | 3.0 | 1266 | 4.5221 |
|
||||||
|
| 4.4388 | 4.0 | 1688 | 4.4752 |
|
||||||
|
| 4.3851 | 5.0 | 2110 | 4.4421 |
|
||||||
|
| 4.3391 | 6.0 | 2532 | 4.4180 |
|
||||||
|
| 4.2975 | 7.0 | 2954 | 4.4000 |
|
||||||
|
| 4.258 | 8.0 | 3376 | 4.3873 |
|
||||||
|
| 4.2193 | 9.0 | 3798 | 4.3806 |
|
||||||
|
| 4.1807 | 10.0 | 4220 | 4.3781 |
|
||||||
|
| 4.1418 | 11.0 | 4642 | 4.3790 |
|
||||||
|
| 4.1029 | 12.0 | 5064 | 4.3839 |
|
||||||
|
| 4.0643 | 13.0 | 5486 | 4.3914 |
|
||||||
|
| 4.0259 | 14.0 | 5908 | 4.4026 |
|
||||||
|
| 3.9896 | 15.0 | 6330 | 4.4131 |
|
||||||
|
| 3.9551 | 16.0 | 6752 | 4.4276 |
|
||||||
|
| 3.9231 | 17.0 | 7174 | 4.4376 |
|
||||||
|
| 3.8953 | 18.0 | 7596 | 4.4503 |
|
||||||
|
| 3.8731 | 19.0 | 8018 | 4.4567 |
|
||||||
|
| 3.8566 | 20.0 | 8440 | 4.4624 |
|
||||||
|
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- Transformers 4.56.1
|
||||||
|
- Pytorch 2.8.0+cu128
|
||||||
|
- Datasets 4.0.0
|
||||||
|
- Tokenizers 0.22.0
|
||||||
33
check-1/config.json
Normal file
33
check-1/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-1/generation_config.json
Normal file
9
check-1/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-1/model.safetensors
Normal file
3
check-1/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:90be159ead9ad5456575cd9af029dec01c629295e9685b7addd7bf79bc2dbe01
|
||||||
|
size 435544704
|
||||||
33
check-11/config.json
Normal file
33
check-11/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-11/generation_config.json
Normal file
9
check-11/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-11/model.safetensors
Normal file
3
check-11/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:5b04530045462c6ac9594c7156cc20f165425a3d7111c9bda37a6cb3baa70a8a
|
||||||
|
size 435544704
|
||||||
33
check-116/config.json
Normal file
33
check-116/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-116/generation_config.json
Normal file
9
check-116/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-116/model.safetensors
Normal file
3
check-116/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:25479e07a664c75d2b13aaa8439416a649550683d7e472ae40ef7eff21146cf8
|
||||||
|
size 435544704
|
||||||
33
check-1256/config.json
Normal file
33
check-1256/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-1256/generation_config.json
Normal file
9
check-1256/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-1256/model.safetensors
Normal file
3
check-1256/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e381827460520fceef5a67b2dd68cc7339a59c99c0bce921dd2e200d2c7bba44
|
||||||
|
size 435544704
|
||||||
33
check-17/config.json
Normal file
33
check-17/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-17/generation_config.json
Normal file
9
check-17/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-17/model.safetensors
Normal file
3
check-17/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:6ae7c430d5f6ad834c7360c92e3a5678f09d8811090246b57243c5f2b16ded9a
|
||||||
|
size 435544704
|
||||||
33
check-187/config.json
Normal file
33
check-187/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-187/generation_config.json
Normal file
9
check-187/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-187/model.safetensors
Normal file
3
check-187/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fdedf0b3f542fb55f9be50d683468e9056e77783b794aa09909800748d7322bf
|
||||||
|
size 435544704
|
||||||
33
check-2/config.json
Normal file
33
check-2/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-2/generation_config.json
Normal file
9
check-2/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-2/model.safetensors
Normal file
3
check-2/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a8436d252e7a878103e9d3fd3413729572eaec8ff28bc65a6967902031a56262
|
||||||
|
size 435544704
|
||||||
33
check-2021/config.json
Normal file
33
check-2021/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-2021/generation_config.json
Normal file
9
check-2021/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-2021/model.safetensors
Normal file
3
check-2021/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:06a9961e667595378a43e6eb1cdb97acf9241f46fa575429d617963039324569
|
||||||
|
size 435544704
|
||||||
33
check-28/config.json
Normal file
33
check-28/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-28/generation_config.json
Normal file
9
check-28/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-28/model.safetensors
Normal file
3
check-28/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:29518f4613bae1cb854f8e87a0650e807fecab27d893ee2b26c09c0c9b4c904b
|
||||||
|
size 435544704
|
||||||
33
check-3/config.json
Normal file
33
check-3/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-3/generation_config.json
Normal file
9
check-3/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-3/model.safetensors
Normal file
3
check-3/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:20419f996cdde8008404128ff7fefbaae2d47dd4c5d8c6493f78d9120120fc3b
|
||||||
|
size 435544704
|
||||||
33
check-301/config.json
Normal file
33
check-301/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-301/generation_config.json
Normal file
9
check-301/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-301/model.safetensors
Normal file
3
check-301/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:07544fc813561b4b75991b476241437cb8ce4cdc7c18e8188c82e2fc63e7ba39
|
||||||
|
size 435544704
|
||||||
33
check-3252/config.json
Normal file
33
check-3252/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-3252/generation_config.json
Normal file
9
check-3252/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-3252/model.safetensors
Normal file
3
check-3252/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f0e8d1e2699b69661ab20ae3cb479203a133a950b25793603f55c887982dc42b
|
||||||
|
size 435544704
|
||||||
33
check-4/config.json
Normal file
33
check-4/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-4/generation_config.json
Normal file
9
check-4/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-4/model.safetensors
Normal file
3
check-4/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e76b5c472810f0c59a214ade0bbb0e1fb2a1be3115008d17692828d450eb1cb7
|
||||||
|
size 435544704
|
||||||
33
check-45/config.json
Normal file
33
check-45/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-45/generation_config.json
Normal file
9
check-45/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-45/model.safetensors
Normal file
3
check-45/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c1b95f82eb8d77444a3933a36d207904a8572e2fdadc42eb0b870a90f82f1759
|
||||||
|
size 435544704
|
||||||
33
check-485/config.json
Normal file
33
check-485/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-485/generation_config.json
Normal file
9
check-485/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-485/model.safetensors
Normal file
3
check-485/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:88d9551d87f1ab62db266b3bfca7260dcf4f001d01b3d8e3b80733ef39e6f54b
|
||||||
|
size 435544704
|
||||||
33
check-5233/config.json
Normal file
33
check-5233/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-5233/generation_config.json
Normal file
9
check-5233/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-5233/model.safetensors
Normal file
3
check-5233/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:7761aa9e8094dd3deff9a080fe9ba194e8e1ec1c1f6b837114f9aa653e0f8d1c
|
||||||
|
size 435544704
|
||||||
33
check-7/config.json
Normal file
33
check-7/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-7/generation_config.json
Normal file
9
check-7/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-7/model.safetensors
Normal file
3
check-7/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:25ad341fa3acb8799988cb611229701255cf6693ede901639ee32fefc886dc34
|
||||||
|
size 435544704
|
||||||
33
check-72/config.json
Normal file
33
check-72/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-72/generation_config.json
Normal file
9
check-72/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-72/model.safetensors
Normal file
3
check-72/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:de57b9fe9d0f2296420c761ad8ae93a016192d65011fcccc348bb116ed570bbc
|
||||||
|
size 435544704
|
||||||
33
check-780/config.json
Normal file
33
check-780/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-780/generation_config.json
Normal file
9
check-780/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-780/model.safetensors
Normal file
3
check-780/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:3f2de966bd252f9fbce019da1f69bbd1abf3e4c0e5568a6403fcbcc1f703cdb9
|
||||||
|
size 435544704
|
||||||
33
check-8420/config.json
Normal file
33
check-8420/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
check-8420/generation_config.json
Normal file
9
check-8420/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
check-8420/model.safetensors
Normal file
3
check-8420/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b71de46bbb72bd6a0c4c532e2f662795551b8e33048779eb84813d5d8fdf59f7
|
||||||
|
size 435544704
|
||||||
33
checkpoint-1000/config.json
Normal file
33
checkpoint-1000/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
checkpoint-1000/generation_config.json
Normal file
9
checkpoint-1000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
checkpoint-1000/model.safetensors
Normal file
3
checkpoint-1000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ef567f1cacb161f08dcae0cf3ee21bff941f3c451cdfb02d1cdf7eab5171af55
|
||||||
|
size 435544704
|
||||||
3
checkpoint-1000/optimizer.pt
Normal file
3
checkpoint-1000/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:d8c2f2c1eaa177e17a78f9779bca3c07ed0790d5c4cde97ff295b408fa4bc30f
|
||||||
|
size 871183627
|
||||||
3
checkpoint-1000/rng_state.pth
Normal file
3
checkpoint-1000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:64e426aac4b359223b6dc58df4f0fcc4e7848580dc21ac6dfe99d9d7b81ad9d6
|
||||||
|
size 14709
|
||||||
3
checkpoint-1000/scaler.pt
Normal file
3
checkpoint-1000/scaler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
|
||||||
|
size 1383
|
||||||
3
checkpoint-1000/scheduler.pt
Normal file
3
checkpoint-1000/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9fe53f9aa94ad340abdfc227d6334173cd3ad0556490859ba978e11019ab04e1
|
||||||
|
size 1465
|
||||||
8
checkpoint-1000/special_tokens_map.json
Normal file
8
checkpoint-1000/special_tokens_map.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
149783
checkpoint-1000/tokenizer.json
Normal file
149783
checkpoint-1000/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
62
checkpoint-1000/tokenizer_config.json
Normal file
62
checkpoint-1000/tokenizer_config.json
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<pad>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"content": "<mask>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"content": "<sep>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"tokenizer_class": "PreTrainedTokenizerFast",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
71
checkpoint-1000/trainer_state.json
Normal file
71
checkpoint-1000/trainer_state.json
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
{
|
||||||
|
"best_global_step": 844,
|
||||||
|
"best_metric": 4.602915287017822,
|
||||||
|
"best_model_checkpoint": null,
|
||||||
|
"epoch": 2.3696682464454977,
|
||||||
|
"eval_steps": 500,
|
||||||
|
"global_step": 1000,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"epoch": 0.002369668246445498,
|
||||||
|
"grad_norm": 20.34552001953125,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 10.3309,
|
||||||
|
"step": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"grad_norm": 1.0193040370941162,
|
||||||
|
"learning_rate": 8.42e-05,
|
||||||
|
"loss": 5.762,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"eval_loss": 4.781050205230713,
|
||||||
|
"eval_runtime": 21.3156,
|
||||||
|
"eval_samples_per_second": 1292.106,
|
||||||
|
"eval_steps_per_second": 5.067,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"grad_norm": 0.6101345419883728,
|
||||||
|
"learning_rate": 9.566919191919192e-05,
|
||||||
|
"loss": 4.6298,
|
||||||
|
"step": 844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"eval_loss": 4.602915287017822,
|
||||||
|
"eval_runtime": 22.4738,
|
||||||
|
"eval_samples_per_second": 1225.514,
|
||||||
|
"eval_steps_per_second": 4.806,
|
||||||
|
"step": 844
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 500,
|
||||||
|
"max_steps": 8440,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 20,
|
||||||
|
"save_steps": 500,
|
||||||
|
"stateful_callbacks": {
|
||||||
|
"TrainerControl": {
|
||||||
|
"args": {
|
||||||
|
"should_epoch_stop": false,
|
||||||
|
"should_evaluate": false,
|
||||||
|
"should_log": false,
|
||||||
|
"should_save": true,
|
||||||
|
"should_training_stop": false
|
||||||
|
},
|
||||||
|
"attributes": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total_flos": 1.6721514233856e+16,
|
||||||
|
"train_batch_size": 256,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
checkpoint-1000/training_args.bin
Normal file
3
checkpoint-1000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e427d3053d40db71f4c8bd911dbb23cf2c24d836bdfda62c5985c167b6b35982
|
||||||
|
size 5969
|
||||||
33
checkpoint-1500/config.json
Normal file
33
checkpoint-1500/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
checkpoint-1500/generation_config.json
Normal file
9
checkpoint-1500/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
checkpoint-1500/model.safetensors
Normal file
3
checkpoint-1500/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:2bea0cc98321effad125f2c8a42376e2fe97e1b90bbb79a36b3c6c4d62287327
|
||||||
|
size 435544704
|
||||||
3
checkpoint-1500/optimizer.pt
Normal file
3
checkpoint-1500/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:98f6c51d435c008620fff70aaf77d1b88632777f8d74036e1a28fd10f5a848f4
|
||||||
|
size 871183627
|
||||||
3
checkpoint-1500/rng_state.pth
Normal file
3
checkpoint-1500/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fc2813b8a922e0c23d0f4adc18080d4e249bb5a6744e07e8ffbb61985084723e
|
||||||
|
size 14709
|
||||||
3
checkpoint-1500/scaler.pt
Normal file
3
checkpoint-1500/scaler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ca372268f4fa9335030c0cb7aedb6cdba75f457da50e7a4034abb1a2d0843689
|
||||||
|
size 1383
|
||||||
3
checkpoint-1500/scheduler.pt
Normal file
3
checkpoint-1500/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:82c7ec661ac7177cc83e5e0338458a0443c18572a396cf491d1c8a649e80ac5e
|
||||||
|
size 1465
|
||||||
8
checkpoint-1500/special_tokens_map.json
Normal file
8
checkpoint-1500/special_tokens_map.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
149783
checkpoint-1500/tokenizer.json
Normal file
149783
checkpoint-1500/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
62
checkpoint-1500/tokenizer_config.json
Normal file
62
checkpoint-1500/tokenizer_config.json
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<pad>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"content": "<mask>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"content": "<sep>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"tokenizer_class": "PreTrainedTokenizerFast",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
86
checkpoint-1500/trainer_state.json
Normal file
86
checkpoint-1500/trainer_state.json
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
{
|
||||||
|
"best_global_step": 1266,
|
||||||
|
"best_metric": 4.522061347961426,
|
||||||
|
"best_model_checkpoint": null,
|
||||||
|
"epoch": 3.5545023696682465,
|
||||||
|
"eval_steps": 500,
|
||||||
|
"global_step": 1500,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"epoch": 0.002369668246445498,
|
||||||
|
"grad_norm": 20.34552001953125,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 10.3309,
|
||||||
|
"step": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"grad_norm": 1.0193040370941162,
|
||||||
|
"learning_rate": 8.42e-05,
|
||||||
|
"loss": 5.762,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"eval_loss": 4.781050205230713,
|
||||||
|
"eval_runtime": 21.3156,
|
||||||
|
"eval_samples_per_second": 1292.106,
|
||||||
|
"eval_steps_per_second": 5.067,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"grad_norm": 0.6101345419883728,
|
||||||
|
"learning_rate": 9.566919191919192e-05,
|
||||||
|
"loss": 4.6298,
|
||||||
|
"step": 844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"eval_loss": 4.602915287017822,
|
||||||
|
"eval_runtime": 22.4738,
|
||||||
|
"eval_samples_per_second": 1225.514,
|
||||||
|
"eval_steps_per_second": 4.806,
|
||||||
|
"step": 844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0,
|
||||||
|
"grad_norm": 0.5042136907577515,
|
||||||
|
"learning_rate": 9.03409090909091e-05,
|
||||||
|
"loss": 4.5095,
|
||||||
|
"step": 1266
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0,
|
||||||
|
"eval_loss": 4.522061347961426,
|
||||||
|
"eval_runtime": 31.9704,
|
||||||
|
"eval_samples_per_second": 861.483,
|
||||||
|
"eval_steps_per_second": 3.378,
|
||||||
|
"step": 1266
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 500,
|
||||||
|
"max_steps": 8440,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 20,
|
||||||
|
"save_steps": 500,
|
||||||
|
"stateful_callbacks": {
|
||||||
|
"TrainerControl": {
|
||||||
|
"args": {
|
||||||
|
"should_epoch_stop": false,
|
||||||
|
"should_evaluate": false,
|
||||||
|
"should_log": false,
|
||||||
|
"should_save": true,
|
||||||
|
"should_training_stop": false
|
||||||
|
},
|
||||||
|
"attributes": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total_flos": 2.5082271350784e+16,
|
||||||
|
"train_batch_size": 256,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
checkpoint-1500/training_args.bin
Normal file
3
checkpoint-1500/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e427d3053d40db71f4c8bd911dbb23cf2c24d836bdfda62c5985c167b6b35982
|
||||||
|
size 5969
|
||||||
33
checkpoint-2000/config.json
Normal file
33
checkpoint-2000/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
9
checkpoint-2000/generation_config.json
Normal file
9
checkpoint-2000/generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.1"
|
||||||
|
}
|
||||||
3
checkpoint-2000/model.safetensors
Normal file
3
checkpoint-2000/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c2ebfacf93e0bdcdb8fd60ef4d0c4bdcabb64d18f8099c254942d44f6a667e23
|
||||||
|
size 435544704
|
||||||
3
checkpoint-2000/optimizer.pt
Normal file
3
checkpoint-2000/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:33ad8130cd597f7ee3355aed2b23b7d937fb2c32917b1148350e5afe13e2521b
|
||||||
|
size 871183627
|
||||||
3
checkpoint-2000/rng_state.pth
Normal file
3
checkpoint-2000/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e41155f5567a230dc7c8832a30bff8969893abb533133e26d69ea67d1f6aede4
|
||||||
|
size 14709
|
||||||
3
checkpoint-2000/scaler.pt
Normal file
3
checkpoint-2000/scaler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:f4aa03f6e0cd07cf67ce1fbe3101d545f5771ef9148b9debf02b11cf6948da5c
|
||||||
|
size 1383
|
||||||
3
checkpoint-2000/scheduler.pt
Normal file
3
checkpoint-2000/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:4dd7845c8b8eee05d9f5b0e34d31b977df0957611b8ae1b3155d47a863d6d83f
|
||||||
|
size 1465
|
||||||
8
checkpoint-2000/special_tokens_map.json
Normal file
8
checkpoint-2000/special_tokens_map.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
149783
checkpoint-2000/tokenizer.json
Normal file
149783
checkpoint-2000/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
62
checkpoint-2000/tokenizer_config.json
Normal file
62
checkpoint-2000/tokenizer_config.json
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<pad>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"content": "<mask>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"content": "<sep>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"mask_token": "<mask>",
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sep_token": "<sep>",
|
||||||
|
"tokenizer_class": "PreTrainedTokenizerFast",
|
||||||
|
"unk_token": "<unk>"
|
||||||
|
}
|
||||||
101
checkpoint-2000/trainer_state.json
Normal file
101
checkpoint-2000/trainer_state.json
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
{
|
||||||
|
"best_global_step": 1688,
|
||||||
|
"best_metric": 4.47515869140625,
|
||||||
|
"best_model_checkpoint": null,
|
||||||
|
"epoch": 4.739336492890995,
|
||||||
|
"eval_steps": 500,
|
||||||
|
"global_step": 2000,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"epoch": 0.002369668246445498,
|
||||||
|
"grad_norm": 20.34552001953125,
|
||||||
|
"learning_rate": 0.0,
|
||||||
|
"loss": 10.3309,
|
||||||
|
"step": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"grad_norm": 1.0193040370941162,
|
||||||
|
"learning_rate": 8.42e-05,
|
||||||
|
"loss": 5.762,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0,
|
||||||
|
"eval_loss": 4.781050205230713,
|
||||||
|
"eval_runtime": 21.3156,
|
||||||
|
"eval_samples_per_second": 1292.106,
|
||||||
|
"eval_steps_per_second": 5.067,
|
||||||
|
"step": 422
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"grad_norm": 0.6101345419883728,
|
||||||
|
"learning_rate": 9.566919191919192e-05,
|
||||||
|
"loss": 4.6298,
|
||||||
|
"step": 844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0,
|
||||||
|
"eval_loss": 4.602915287017822,
|
||||||
|
"eval_runtime": 22.4738,
|
||||||
|
"eval_samples_per_second": 1225.514,
|
||||||
|
"eval_steps_per_second": 4.806,
|
||||||
|
"step": 844
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0,
|
||||||
|
"grad_norm": 0.5042136907577515,
|
||||||
|
"learning_rate": 9.03409090909091e-05,
|
||||||
|
"loss": 4.5095,
|
||||||
|
"step": 1266
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0,
|
||||||
|
"eval_loss": 4.522061347961426,
|
||||||
|
"eval_runtime": 31.9704,
|
||||||
|
"eval_samples_per_second": 861.483,
|
||||||
|
"eval_steps_per_second": 3.378,
|
||||||
|
"step": 1266
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 4.0,
|
||||||
|
"grad_norm": 0.5082475543022156,
|
||||||
|
"learning_rate": 8.501262626262628e-05,
|
||||||
|
"loss": 4.4388,
|
||||||
|
"step": 1688
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 4.0,
|
||||||
|
"eval_loss": 4.47515869140625,
|
||||||
|
"eval_runtime": 31.848,
|
||||||
|
"eval_samples_per_second": 864.796,
|
||||||
|
"eval_steps_per_second": 3.391,
|
||||||
|
"step": 1688
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 500,
|
||||||
|
"max_steps": 8440,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 20,
|
||||||
|
"save_steps": 500,
|
||||||
|
"stateful_callbacks": {
|
||||||
|
"TrainerControl": {
|
||||||
|
"args": {
|
||||||
|
"should_epoch_stop": false,
|
||||||
|
"should_evaluate": false,
|
||||||
|
"should_log": false,
|
||||||
|
"should_save": true,
|
||||||
|
"should_training_stop": false
|
||||||
|
},
|
||||||
|
"attributes": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"total_flos": 3.3443028467712e+16,
|
||||||
|
"train_batch_size": 256,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
checkpoint-2000/training_args.bin
Normal file
3
checkpoint-2000/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:e427d3053d40db71f4c8bd911dbb23cf2c24d836bdfda62c5985c167b6b35982
|
||||||
|
size 5969
|
||||||
33
checkpoint-2500/config.json
Normal file
33
checkpoint-2500/config.json
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
{
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 12,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"transformers_version": "4.56.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 30000
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user