commit 0f4a3d70032d5d926a32da02e655240af611d327 Author: ModelHub XC Date: Wed May 6 08:12:36 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: HuggingFaceH4/EleutherAI_pythia-6.9b-deduped_sft_tldr Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..21b3632 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bc5f30d --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +--- +library_name: transformers +tags: [] +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + +This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..b2e2de3 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "cleanrl/EleutherAI_pythia-6.9b-deduped__sft__tldr", + "architectures": [ + "GPTNeoXForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 0, + "classifier_dropout": 0.1, + "eos_token_id": 0, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 16384, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "rope_scaling": null, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 50432 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..a3a900a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,5 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..797a1d2 --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef00597b35bd987c8aaf65e96d62e725f740db815c866f91204f03ec658e5383 +size 4853947896 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..310443a --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427fcce71a26deb7a28bb0f2bb925d0768d1ccc03ab884564c94a99669150122 +size 4833124648 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..fb43e42 --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a145b1515fcf2cec1f694a9684a9a20320ae49dda4b9c97b43f61972117a6b +size 4833124720 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..5d75aa3 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e5487a56301bed804ae23bc9a2d07037509c4ed0cd7e61f92805fef5a9f696 +size 4833124720 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..d0b3209 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0148fc441004eff2e0694d509fc65c108a2897cdd8fe1304e3c29731316f789 +size 4833124720 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..0dbf506 --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3925785b6c052f1d69f066b4fcca0fb2ba105f8f9a02f4e3b07e2a46f10936a7 +size 3242807336 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ce60e93 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,395 @@ +{ + "metadata": { + "total_size": 27429208064 + }, + "weight_map": { + "embed_out.weight": "model-00006-of-00006.safetensors", + "gpt_neox.embed_in.weight": "model-00001-of-00006.safetensors", + "gpt_neox.final_layer_norm.bias": "model-00006-of-00006.safetensors", + "gpt_neox.final_layer_norm.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.0.attention.dense.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.attention.dense.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.attention.query_key_value.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.attention.query_key_value.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.mlp.dense_4h_to_h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.mlp.dense_4h_to_h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.mlp.dense_h_to_4h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.mlp.dense_h_to_4h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.attention.dense.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.attention.dense.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.attention.query_key_value.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.attention.query_key_value.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.mlp.dense_4h_to_h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.mlp.dense_4h_to_h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.mlp.dense_h_to_4h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.mlp.dense_h_to_4h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.10.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.11.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.11.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.11.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.12.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.13.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.14.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.attention.dense.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.attention.dense.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.attention.query_key_value.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.attention.query_key_value.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.mlp.dense_4h_to_h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.mlp.dense_4h_to_h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.mlp.dense_h_to_4h.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.mlp.dense_h_to_4h.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.17.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.input_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.17.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.17.post_attention_layernorm.bias": "model-00003-of-00006.safetensors", + "gpt_neox.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "gpt_neox.layers.18.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.18.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.19.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.2.attention.dense.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.attention.dense.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.attention.query_key_value.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.attention.query_key_value.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.mlp.dense_4h_to_h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.mlp.dense_4h_to_h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.mlp.dense_h_to_4h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.mlp.dense_h_to_4h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.20.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.20.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.attention.dense.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.attention.dense.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.attention.query_key_value.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.attention.query_key_value.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.mlp.dense_4h_to_h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.mlp.dense_4h_to_h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.mlp.dense_h_to_4h.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.mlp.dense_h_to_4h.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.22.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.23.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.input_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.23.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.23.post_attention_layernorm.bias": "model-00004-of-00006.safetensors", + "gpt_neox.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "gpt_neox.layers.24.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.24.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.25.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.26.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.27.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.attention.dense.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.attention.dense.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.attention.query_key_value.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.attention.query_key_value.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.mlp.dense_4h_to_h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.mlp.dense_4h_to_h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.mlp.dense_h_to_4h.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.mlp.dense_h_to_4h.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.28.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.29.attention.dense.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.attention.dense.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.attention.query_key_value.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.attention.query_key_value.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.input_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.29.input_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.29.mlp.dense_4h_to_h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.mlp.dense_4h_to_h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.mlp.dense_h_to_4h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.mlp.dense_h_to_4h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.29.post_attention_layernorm.bias": "model-00005-of-00006.safetensors", + "gpt_neox.layers.29.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "gpt_neox.layers.3.attention.dense.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.attention.dense.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.attention.query_key_value.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.attention.query_key_value.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.mlp.dense_4h_to_h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.mlp.dense_4h_to_h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.mlp.dense_h_to_4h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.mlp.dense_h_to_4h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.30.attention.dense.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.attention.dense.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.attention.query_key_value.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.attention.query_key_value.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.input_layernorm.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.input_layernorm.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.mlp.dense_4h_to_h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.mlp.dense_4h_to_h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.mlp.dense_h_to_4h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.mlp.dense_h_to_4h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.post_attention_layernorm.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.30.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.attention.dense.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.attention.dense.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.attention.query_key_value.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.attention.query_key_value.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.input_layernorm.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.input_layernorm.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.mlp.dense_4h_to_h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.mlp.dense_4h_to_h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.mlp.dense_h_to_4h.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.mlp.dense_h_to_4h.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.post_attention_layernorm.bias": "model-00006-of-00006.safetensors", + "gpt_neox.layers.31.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "gpt_neox.layers.4.attention.dense.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.attention.dense.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.attention.query_key_value.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.attention.query_key_value.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.mlp.dense_4h_to_h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.mlp.dense_4h_to_h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.mlp.dense_h_to_4h.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.mlp.dense_h_to_4h.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.5.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.input_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.5.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.5.post_attention_layernorm.bias": "model-00001-of-00006.safetensors", + "gpt_neox.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "gpt_neox.layers.6.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.attention.dense.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.attention.dense.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.attention.query_key_value.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.attention.query_key_value.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.input_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.mlp.dense_4h_to_h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.mlp.dense_4h_to_h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.mlp.dense_h_to_4h.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.mlp.dense_h_to_4h.weight": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.post_attention_layernorm.bias": "model-00002-of-00006.safetensors", + "gpt_neox.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..2f2c4ef --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..0285a20 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1d6db37fd865e8666639b1f84039d2b5c62fe738699366c8eee45ab6981fe06 +size 2114223 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..932f981 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,223 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50254": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50255": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50256": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50257": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50258": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50259": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50260": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50261": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50262": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50263": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50264": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50265": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50266": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50267": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50268": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50269": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50270": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50271": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50272": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50273": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50274": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50275": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50276": { + "content": " ", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "50277": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "tokenizer_class": "GPTNeoXTokenizer", + "unk_token": "<|endoftext|>" +}