初始化项目,由ModelHub XC社区提供模型
Model: PygmalionAI/pygmalion-350m Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
||||
24
README.md
Normal file
24
README.md
Normal file
@@ -0,0 +1,24 @@
|
||||
---
|
||||
language:
|
||||
- en
|
||||
thumbnail:
|
||||
tags:
|
||||
- convAI
|
||||
- conversational
|
||||
inference: false
|
||||
---
|
||||
# pygmalion-350m
|
||||
|
||||
# Model description
|
||||
|
||||
This is a proof-of-concept fine-tune of Facebook's OPT-350M model optimized for dialogue, to be used as a stepping stone to higher parameter models.
|
||||
|
||||
**Disclaimer:** NSFW data was included in the fine-tuning of this model. Although SFW inputs will usually result in SFW outputs, you are advised to **chat at your own risk. This model is not suitable for use by minors.**
|
||||
|
||||
# Fine-tuning process
|
||||
|
||||
This model was much easier than expected to create.
|
||||
|
||||
We used the [ColossalAI](https://www.colossalai.org/) library to fine-tune the [OPT-350M](https://huggingface.co/facebook/opt-350m) model originally trained by Facebook on The Pile. Though our initial dataset was sets of dialogue gathered from various sources totaling about 50 MB in size, early training runs revealed that the model converged after only 7% of the dataset was passed through. To alleviate this, we massively reduced the size of the dataset to only 273 KB.
|
||||
|
||||
ColossalAI's magic allowed for something incredible: this entire model was fine-tuned on a singular GPU with only 6 GB ***(!)*** of VRAM. Fine-tuning took less than an hour to complete.
|
||||
28
config.json
Normal file
28
config.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"_name_or_path": "opt-350m",
|
||||
"activation_dropout": 0.0,
|
||||
"activation_function": "relu",
|
||||
"architectures": [
|
||||
"OPTForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 2,
|
||||
"do_layer_norm_before": false,
|
||||
"dropout": 0.1,
|
||||
"eos_token_id": 2,
|
||||
"ffn_dim": 4096,
|
||||
"hidden_size": 1024,
|
||||
"init_std": 0.02,
|
||||
"layerdrop": 0.0,
|
||||
"max_position_embeddings": 2048,
|
||||
"model_type": "opt",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 24,
|
||||
"pad_token_id": 1,
|
||||
"prefix": "</s>",
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.20.0.dev0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 50272,
|
||||
"word_embed_proj_dim": 512
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||
50001
merges.txt
Normal file
50001
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
pytorch_model.bin
Normal file
3
pytorch_model.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:356aa4ab61193d13e3e7a097bb5f2c025dc2536d5f127154889202ba3c735ae2
|
||||
size 1324917213
|
||||
1
special_tokens_map.json
Normal file
1
special_tokens_map.json
Normal file
@@ -0,0 +1 @@
|
||||
{"bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
|
||||
1
tokenizer_config.json
Normal file
1
tokenizer_config.json
Normal file
@@ -0,0 +1 @@
|
||||
{"errors": "replace", "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "patrickvonplaten/opt-30b"}
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user