初始化项目,由ModelHub XC社区提供模型
Model: Dnnn-19/my_awesome_eli5_clm-model Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
---
|
||||||
|
library_name: transformers
|
||||||
|
license: apache-2.0
|
||||||
|
base_model: distilbert/distilgpt2
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
model-index:
|
||||||
|
- name: my_awesome_eli5_clm-model
|
||||||
|
results: []
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||||
|
should probably proofread and complete it, then remove this comment. -->
|
||||||
|
|
||||||
|
# my_awesome_eli5_clm-model
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [distilbert/distilgpt2](https://huggingface.co/distilbert/distilgpt2) on an unknown dataset.
|
||||||
|
It achieves the following results on the evaluation set:
|
||||||
|
- Loss: 3.7822
|
||||||
|
|
||||||
|
## Model description
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Intended uses & limitations
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training and evaluation data
|
||||||
|
|
||||||
|
More information needed
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
### Training hyperparameters
|
||||||
|
|
||||||
|
The following hyperparameters were used during training:
|
||||||
|
- learning_rate: 2e-05
|
||||||
|
- train_batch_size: 8
|
||||||
|
- eval_batch_size: 8
|
||||||
|
- seed: 42
|
||||||
|
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||||
|
- lr_scheduler_type: linear
|
||||||
|
- num_epochs: 3.0
|
||||||
|
|
||||||
|
### Training results
|
||||||
|
|
||||||
|
| Training Loss | Epoch | Step | Validation Loss |
|
||||||
|
|:-------------:|:-----:|:----:|:---------------:|
|
||||||
|
| 3.9154 | 1.0 | 1312 | 3.7935 |
|
||||||
|
| 3.8253 | 2.0 | 2624 | 3.7845 |
|
||||||
|
| 3.7862 | 3.0 | 3936 | 3.7822 |
|
||||||
|
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- Transformers 5.0.0
|
||||||
|
- Pytorch 2.10.0+cu128
|
||||||
|
- Datasets 4.0.0
|
||||||
|
- Tokenizers 0.22.2
|
||||||
48
config.json
Normal file
48
config.json
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
{
|
||||||
|
"_num_labels": 1,
|
||||||
|
"activation_function": "gelu_new",
|
||||||
|
"add_cross_attention": false,
|
||||||
|
"architectures": [
|
||||||
|
"GPT2LMHeadModel"
|
||||||
|
],
|
||||||
|
"attn_pdrop": 0.1,
|
||||||
|
"bos_token_id": 50256,
|
||||||
|
"dtype": "float32",
|
||||||
|
"embd_pdrop": 0.1,
|
||||||
|
"eos_token_id": 50256,
|
||||||
|
"id2label": {
|
||||||
|
"0": "LABEL_0"
|
||||||
|
},
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"label2id": {
|
||||||
|
"LABEL_0": 0
|
||||||
|
},
|
||||||
|
"layer_norm_epsilon": 1e-05,
|
||||||
|
"model_type": "gpt2",
|
||||||
|
"n_ctx": 1024,
|
||||||
|
"n_embd": 768,
|
||||||
|
"n_head": 12,
|
||||||
|
"n_inner": null,
|
||||||
|
"n_layer": 6,
|
||||||
|
"n_positions": 1024,
|
||||||
|
"pad_token_id": 50256,
|
||||||
|
"reorder_and_upcast_attn": false,
|
||||||
|
"resid_pdrop": 0.1,
|
||||||
|
"scale_attn_by_inverse_layer_idx": false,
|
||||||
|
"scale_attn_weights": true,
|
||||||
|
"summary_activation": null,
|
||||||
|
"summary_first_dropout": 0.1,
|
||||||
|
"summary_proj_to_labels": true,
|
||||||
|
"summary_type": "cls_index",
|
||||||
|
"summary_use_proj": true,
|
||||||
|
"task_specific_params": {
|
||||||
|
"text-generation": {
|
||||||
|
"do_sample": true,
|
||||||
|
"max_length": 50
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers_version": "5.0.0",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 50257
|
||||||
|
}
|
||||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 50256,
|
||||||
|
"eos_token_id": [
|
||||||
|
50256
|
||||||
|
],
|
||||||
|
"pad_token_id": 50256,
|
||||||
|
"transformers_version": "5.0.0"
|
||||||
|
}
|
||||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:5e05adebe7e1c75e73a46d1a26d9c3aece4a789b1d0d5abca7797813faf05717
|
||||||
|
size 327657928
|
||||||
250306
tokenizer.json
Normal file
250306
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
12
tokenizer_config.json
Normal file
12
tokenizer_config.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"backend": "tokenizers",
|
||||||
|
"bos_token": "<|endoftext|>",
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"errors": "replace",
|
||||||
|
"is_local": false,
|
||||||
|
"model_max_length": 1024,
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"unk_token": "<|endoftext|>"
|
||||||
|
}
|
||||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:59c711f0e676df0dfcc4d0a7c5b6720f15e1b13f387185be984dce7d68af4f90
|
||||||
|
size 5201
|
||||||
Reference in New Issue
Block a user