From 883c62d9696b4b1d925ef1dc3af284546a90aca2 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 11 May 2026 17:35:36 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: PygmalionAI/pygmalion-1.3b Source: Original Platform --- .gitattributes | 51 ++++++++++ README.md | 59 ++++++++++++ config.json | 96 +++++++++++++++++++ configuration.json | 1 + model.safetensors | 3 + pytorch_model.bin | 3 + special_tokens_map.json | 5 + ...out.tfevents.1672095305.lavidP6000.20829.0 | 3 + ....out.tfevents.1672097776.lavidP6000.1415.0 | 3 + ....out.tfevents.1672099494.lavidP6000.9498.0 | 3 + ...out.tfevents.1672100263.lavidP6000.13066.0 | 3 + ...out.tfevents.1672103119.lavidP6000.27877.0 | 3 + ....out.tfevents.1672154374.lavidP6000.9711.0 | 3 + ...out.tfevents.1672161448.lavidP6000.18901.0 | 3 + tokenizer.json | 3 + tokenizer_config.json | 9 ++ 16 files changed, 251 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 model.safetensors create mode 100644 pytorch_model.bin create mode 100644 special_tokens_map.json create mode 100644 tensorboard_runs/2022-12-26T_19-55-05/events.out.tfevents.1672095305.lavidP6000.20829.0 create mode 100644 tensorboard_runs/2022-12-26T_20-36-16/events.out.tfevents.1672097776.lavidP6000.1415.0 create mode 100644 tensorboard_runs/2022-12-26T_21-04-54/events.out.tfevents.1672099494.lavidP6000.9498.0 create mode 100644 tensorboard_runs/2022-12-26T_21-17-43/events.out.tfevents.1672100263.lavidP6000.13066.0 create mode 100644 tensorboard_runs/2022-12-26T_22-05-19/events.out.tfevents.1672103119.lavidP6000.27877.0 create mode 100644 tensorboard_runs/2022-12-27T_12-19-34/events.out.tfevents.1672154374.lavidP6000.9711.0 create mode 100644 tensorboard_runs/2022-12-27T_14-17-28/events.out.tfevents.1672161448.lavidP6000.18901.0 create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6f9d61b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,51 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text + +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +pytorch_model.bin filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..37e7f8a --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +--- +license: agpl-3.0 +language: +- en +thumbnail: +tags: +- text generation +- conversational +inference: false + +--- + +# Pygmalion 1.3B + +## Model description + +Pymalion 1.3B is a proof-of-concept dialogue model based on EleutherAI's [pythia-1.3b-deduped](https://huggingface.co/EleutherAI/pythia-1.3b-deduped). + +**Warning:** This model is **NOT** suitable for use by minors. It **will** output X-rated content under certain circumstances. + +## Training data + +The fine-tuning dataset consisted of 56MB of dialogue data gathered from multiple sources, which includes both real _and_ partially machine-generated conversations. + +## Training procedure + +Fine-tuning was done using [ColossalAI](https://github.com/hpcaitech/ColossalAI) (specifically, with a slightly modified version of their [OPT fine-tune example](https://github.com/hpcaitech/ColossalAI/blob/78509124d32b63b7fc36f6508e0576a326d51422/examples/language/opt/run_clm.py)) for around 11.4 million tokens over 5440 steps on a single 24GB GPU. The run took just under 21 hours. + +## Intended use + +### The easy way + +We provide a notebook with a Gradio UI for playing around with the model without having to manually format inputs. This notebook can be found [here](https://github.com/PygmalionAI/gradio-ui/blob/master/notebooks/GPU.ipynb). + +### The manual way + +The model can be used as a regular text generation model, but it'll perform best if the input prompt adheres to the following format: + +``` +[CHARACTER]'s Persona: [A few sentences about the character you want the model to play] + +[DIALOGUE HISTORY] +You: [Your input message here] +[CHARACTER]: +``` + +Where `[CHARACTER] `is, as you can probably guess, the name of the character you want the model to portray, and `[DIALOGUE HISTORY]` is chat history so the model can have some conversational context to draw from. Ideally it'll be pairs of messages like: + +``` +[CHARACTER]: [some dialogue here] +You: [your response to the dialogue above] +``` + +Apart from chat history, you can also just add example conversations in `[DIALOGUE HISTORY]` to show how the character should speak - ideally at the beginning, so it doesn't get confused as to what's conversation history vs. character definition. + +## Known issues + +- The model can get stuck repeating certain phrases, or sometimes even entire sentences. + - We believe this is due to that behavior being present in the training data itself, and plan to investigate and adjust accordingly for future versions. diff --git a/config.json b/config.json new file mode 100644 index 0000000..d25434d --- /dev/null +++ b/config.json @@ -0,0 +1,96 @@ +{ + "_name_or_path": "EleutherAI/pythia-1.3b-deduped", + "architectures": [ + "GPTNeoXForCausalLM" + ], + "bad_words_ids": [ + [ + 434, + 15694, + 66, + 27, + 209 + ], + [ + 15362 + ], + [ + 1713 + ], + [ + 1713, + 64 + ], + [ + 1713, + 876 + ], + [ + 2016, + 251, + 857, + 75, + 9194, + 35478 + ], + [ + 2391 + ], + [ + 20340 + ], + [ + 33021 + ], + [ + 2391, + 1051 + ], + [ + 5638 + ], + [ + 2391, + 20340 + ], + [ + 5638, + 537 + ], + [ + 1559, + 2345 + ], + [ + 1559, + 7849 + ], + [ + 1559, + 17379 + ], + [ + 25321, + 4611 + ] + ], + "bos_token_id": 0, + "eos_token_id": 0, + "hidden_act": "gelu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.25.1", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 50304 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b45f2dc --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c09c32c69cfc9430deeebc8acd6c18a3f8420c90d267737ce217e442f9673576 +size 2930002184 diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..ad0f7ff --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3228245a35970cafbebea523525daf888f8a04c433ca2e277883b0ad98da96c3 +size 2930076797 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..0204ed1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "unk_token": "<|endoftext|>" +} diff --git a/tensorboard_runs/2022-12-26T_19-55-05/events.out.tfevents.1672095305.lavidP6000.20829.0 b/tensorboard_runs/2022-12-26T_19-55-05/events.out.tfevents.1672095305.lavidP6000.20829.0 new file mode 100644 index 0000000..ee756b4 --- /dev/null +++ b/tensorboard_runs/2022-12-26T_19-55-05/events.out.tfevents.1672095305.lavidP6000.20829.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a0d7b08dd9db7fe5cfa86f2160b5907ed98f27187a68e80982c648075bbc5d +size 14890 diff --git a/tensorboard_runs/2022-12-26T_20-36-16/events.out.tfevents.1672097776.lavidP6000.1415.0 b/tensorboard_runs/2022-12-26T_20-36-16/events.out.tfevents.1672097776.lavidP6000.1415.0 new file mode 100644 index 0000000..9f2fd46 --- /dev/null +++ b/tensorboard_runs/2022-12-26T_20-36-16/events.out.tfevents.1672097776.lavidP6000.1415.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d7b5cc42203fb0878cb3ef4c5b10ee26a4c2bf1ebd520f8d1003c221c268784 +size 23464 diff --git a/tensorboard_runs/2022-12-26T_21-04-54/events.out.tfevents.1672099494.lavidP6000.9498.0 b/tensorboard_runs/2022-12-26T_21-04-54/events.out.tfevents.1672099494.lavidP6000.9498.0 new file mode 100644 index 0000000..710aa3f --- /dev/null +++ b/tensorboard_runs/2022-12-26T_21-04-54/events.out.tfevents.1672099494.lavidP6000.9498.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0315c65928850fb05ff521feacd66e4548e1805e72b4a7b3c2ba93bad9e34c +size 11752 diff --git a/tensorboard_runs/2022-12-26T_21-17-43/events.out.tfevents.1672100263.lavidP6000.13066.0 b/tensorboard_runs/2022-12-26T_21-17-43/events.out.tfevents.1672100263.lavidP6000.13066.0 new file mode 100644 index 0000000..881e10e --- /dev/null +++ b/tensorboard_runs/2022-12-26T_21-17-43/events.out.tfevents.1672100263.lavidP6000.13066.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839e9d6814b341244d9559c8d8da790924cddfde52ec7553839e0680cdb24775 +size 35176 diff --git a/tensorboard_runs/2022-12-26T_22-05-19/events.out.tfevents.1672103119.lavidP6000.27877.0 b/tensorboard_runs/2022-12-26T_22-05-19/events.out.tfevents.1672103119.lavidP6000.27877.0 new file mode 100644 index 0000000..7477289 --- /dev/null +++ b/tensorboard_runs/2022-12-26T_22-05-19/events.out.tfevents.1672103119.lavidP6000.27877.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a86a31c7adaf2d7e19fb08bbf934a9a325becc02fbe28bf01f1972b04275277 +size 597718 diff --git a/tensorboard_runs/2022-12-27T_12-19-34/events.out.tfevents.1672154374.lavidP6000.9711.0 b/tensorboard_runs/2022-12-27T_12-19-34/events.out.tfevents.1672154374.lavidP6000.9711.0 new file mode 100644 index 0000000..05871ec --- /dev/null +++ b/tensorboard_runs/2022-12-27T_12-19-34/events.out.tfevents.1672154374.lavidP6000.9711.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98e45100f5830efc847fe0900b5eba223eacd4b1f26990245f0621252faedcb +size 82024 diff --git a/tensorboard_runs/2022-12-27T_14-17-28/events.out.tfevents.1672161448.lavidP6000.18901.0 b/tensorboard_runs/2022-12-27T_14-17-28/events.out.tfevents.1672161448.lavidP6000.18901.0 new file mode 100644 index 0000000..4ee8a55 --- /dev/null +++ b/tensorboard_runs/2022-12-27T_14-17-28/events.out.tfevents.1672161448.lavidP6000.18901.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49a383522d051485967feefab91e1e6499f2a25a099c611fdd4bfe400a20aa87 +size 234280 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..8fa6a67 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c24618a1b3e6a38167beff1c72cffd126c3a66254347304b50547d12c5f25624 +size 2113710 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..16f1d0e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "name_or_path": "EleutherAI/gpt-neox-20b", + "special_tokens_map_file": "/fsx/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/3523781c8df75f7741687a4284f6f70e1afa12f4/special_tokens_map.json", + "tokenizer_class": "GPTNeoXTokenizer", + "unk_token": "<|endoftext|>" +}