初始化项目,由ModelHub XC社区提供模型
Model: 1234Frede/v5SmolLM2-business-news-generator Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
69
README.md
Normal file
69
README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: apache-2.0
|
||||
base_model: HuggingFaceTB/SmolLM2-135M
|
||||
tags:
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: v5SmolLM2-business-news-generator
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# v5SmolLM2-business-news-generator
|
||||
|
||||
This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M) on an unknown dataset.
|
||||
It achieves the following results on the evaluation set:
|
||||
- Loss: 3.0644
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 5e-06
|
||||
- train_batch_size: 8
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
| Training Loss | Epoch | Step | Validation Loss |
|
||||
|:-------------:|:-----:|:----:|:---------------:|
|
||||
| 3.6465 | 0.08 | 100 | 3.4739 |
|
||||
| 3.3588 | 0.16 | 200 | 3.3058 |
|
||||
| 3.2424 | 0.24 | 300 | 3.2164 |
|
||||
| 3.1289 | 0.32 | 400 | 3.1625 |
|
||||
| 3.0872 | 0.4 | 500 | 3.1281 |
|
||||
| 3.0701 | 0.48 | 600 | 3.1049 |
|
||||
| 3.0425 | 0.56 | 700 | 3.0895 |
|
||||
| 3.101 | 0.64 | 800 | 3.0783 |
|
||||
| 2.9676 | 0.72 | 900 | 3.0710 |
|
||||
| 3.0192 | 0.8 | 1000 | 3.0668 |
|
||||
| 2.9593 | 0.88 | 1100 | 3.0648 |
|
||||
| 2.9943 | 0.96 | 1200 | 3.0644 |
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.1
|
||||
- Pytorch 2.8.0+cu126
|
||||
- Datasets 4.0.0
|
||||
- Tokenizers 0.22.1
|
||||
32
config.json
Normal file
32
config.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 0,
|
||||
"dtype": "float32",
|
||||
"eos_token_id": 0,
|
||||
"head_dim": 64,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 576,
|
||||
"initializer_range": 0.041666666666666664,
|
||||
"intermediate_size": 1536,
|
||||
"is_llama_config": true,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 9,
|
||||
"num_hidden_layers": 30,
|
||||
"num_key_value_heads": 3,
|
||||
"pad_token_id": 0,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_interleaved": false,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 100000,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "4.57.1",
|
||||
"use_cache": true,
|
||||
"vocab_size": 49152
|
||||
}
|
||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 0,
|
||||
"eos_token_id": [
|
||||
0
|
||||
],
|
||||
"pad_token_id": 0,
|
||||
"transformers_version": "4.57.1"
|
||||
}
|
||||
48901
merges.txt
Normal file
48901
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:76b8496a7bb5449ec25c2fb4a5d626ac02c12dbe6c81de9fd41941b7fb84bd70
|
||||
size 538090408
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6f03d6bd784c756abce6707dc76be8801ca3525ef9561fdcccc92aff1efddebd
|
||||
size 8429
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4f7f925c8579fb9e15ad970c89d0ac2a8a25252b1bce65d62da40d4594cf21d4
|
||||
size 15791
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4c16ff2c2cb71e4f2cfb55bdd769283e62023b606eaf3f7ffda5d83d5e4edd17
|
||||
size 6629
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c5401b6512c2ac21573b65295e79a61b239de55620a5ef9b721bfc687956bea2
|
||||
size 11321
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:97a03885ec57f6b7400c7bb41b1f530a9a041b6a4fde51b819131b3b9d0044e7
|
||||
size 5192
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:44691a3c58b15e79fb20052be3d17ba1627f9b01301f3f13ed5a57a83c56fed0
|
||||
size 8075
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:126de7931cea4c254439811ea0f093e3efefd55029f6231a7329afbc116f165c
|
||||
size 11321
|
||||
43
special_tokens_map.json
Normal file
43
special_tokens_map.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|endoftext|>",
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<repo_name>",
|
||||
"<reponame>",
|
||||
"<file_sep>",
|
||||
"<filename>",
|
||||
"<gh_stars>",
|
||||
"<issue_start>",
|
||||
"<issue_comment>",
|
||||
"<issue_closed>",
|
||||
"<jupyter_start>",
|
||||
"<jupyter_text>",
|
||||
"<jupyter_code>",
|
||||
"<jupyter_output>",
|
||||
"<jupyter_script>",
|
||||
"<empty_output>"
|
||||
],
|
||||
"bos_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|endoftext|>",
|
||||
"unk_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
244949
tokenizer.json
Normal file
244949
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
169
tokenizer_config.json
Normal file
169
tokenizer_config.json
Normal file
@@ -0,0 +1,169 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"3": {
|
||||
"content": "<repo_name>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"4": {
|
||||
"content": "<reponame>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"5": {
|
||||
"content": "<file_sep>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"6": {
|
||||
"content": "<filename>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"7": {
|
||||
"content": "<gh_stars>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"8": {
|
||||
"content": "<issue_start>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"9": {
|
||||
"content": "<issue_comment>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"10": {
|
||||
"content": "<issue_closed>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"11": {
|
||||
"content": "<jupyter_start>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"12": {
|
||||
"content": "<jupyter_text>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"13": {
|
||||
"content": "<jupyter_code>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"14": {
|
||||
"content": "<jupyter_output>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"15": {
|
||||
"content": "<jupyter_script>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"16": {
|
||||
"content": "<empty_output>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|endoftext|>",
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<repo_name>",
|
||||
"<reponame>",
|
||||
"<file_sep>",
|
||||
"<filename>",
|
||||
"<gh_stars>",
|
||||
"<issue_start>",
|
||||
"<issue_comment>",
|
||||
"<issue_closed>",
|
||||
"<jupyter_start>",
|
||||
"<jupyter_text>",
|
||||
"<jupyter_code>",
|
||||
"<jupyter_output>",
|
||||
"<jupyter_script>",
|
||||
"<empty_output>"
|
||||
],
|
||||
"bos_token": "<|endoftext|>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|endoftext|>",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 8192,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"tokenizer_class": "GPT2Tokenizer",
|
||||
"unk_token": "<|endoftext|>",
|
||||
"vocab_size": 49152
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3cc1f82e35e86074f5f7be88e48b6387db8b73cebbf9934700abd8b0766853c3
|
||||
size 5905
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user