From 084d43aed6342a2138e585d433e9bb92a9e2ff94 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 18 May 2026 17:33:54 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: ModelSpace/GemmaX2-28-9B-v0.1 Source: Original Platform --- .gitattributes | 37 + LICENSE | 5 + NOTICE | 1 + README.md | 98 ++ config.json | 34 + generation_config.json | 8 + main.png | 3 + model-00001-of-00005.safetensors | 3 + model-00002-of-00005.safetensors | 3 + model-00003-of-00005.safetensors | 3 + model-00004-of-00005.safetensors | 3 + model-00005-of-00005.safetensors | 3 + model.safetensors.index.json | 472 ++++++++ special_tokens_map.json | 34 + tokenizer.json | 3 + tokenizer.model | 3 + tokenizer_config.json | 1759 ++++++++++++++++++++++++++++++ 17 files changed, 2472 insertions(+) create mode 100644 .gitattributes create mode 100644 LICENSE create mode 100644 NOTICE create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 main.png create mode 100644 model-00001-of-00005.safetensors create mode 100644 model-00002-of-00005.safetensors create mode 100644 model-00003-of-00005.safetensors create mode 100644 model-00004-of-00005.safetensors create mode 100644 model-00005-of-00005.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f4a9b4a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +main.png filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a262043 --- /dev/null +++ b/LICENSE @@ -0,0 +1,5 @@ +# GemmaX2 Terms of Use + +Copyright (C) 2024 Xiaomi Corporation. + +Licensed under the [Gemma](https://ai.google.dev/gemma/terms). diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..59f8207 --- /dev/null +++ b/NOTICE @@ -0,0 +1 @@ +GemmaX2-28 is a model based on the Gamma2 architecture, further pre-trained and fine-tuned with instruction-based objectives across 28 languages, and Gemma is provided under and subject to the Gemma Terms of Use found at ai.google.dev/gemma/terms. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..afd3254 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +--- +license: gemma +license_name: license +license_link: LICENSE +metrics: +- bleu +- comet +base_model: +- ModelSpace/GemmaX2-28-9B-Pretrain +pipeline_tag: translation +library_name: transformers +language: +- ar +- bn +- cs +- de +- en +- es +- fa +- fr +- he +- hi +- id +- it +- ja +- km +- ko +- lo +- ms +- my +- nl +- pl +- pt +- ru +- th +- tl +- tr +- ur +- vi +- zh +--- + +## Updates + +New multilingual machine translation model (MiLMMT-46) is now available. Please check the [link](https://huggingface.co/collections/xiaomi-research/milmmt-46) for detailed information. + + +## Model Description + +GemmaX2-28-9B-v0.1 is an LLM-based translation model. It has been fintuned on GemmaX2-28-9B-Pretrain, which is a language model developed through continual pretraining of Gemma2-9B using a mix of 56 billion tokens from both monolingual and parallel data across 28 different languages. Please find more details in our paper: [Multilingual Machine Translation with Open Large Language Models at Practical Scale: An Empirical Study](https://arxiv.org/abs/2502.02481). + + +- **Developed by:** Xiaomi +- **Model type:** GemmaX2-28-9B-Pretrain is obtained by continually pretraining Gemma2-9B on a large amount of monolingual and parallel data. Subsequently, GemmaX2-28-9B-v0.1 is derived through supervised finetuning on a small set of high-quality translation instruction data. +- **Languages:** Arabic, Bengali, Czech, German, English, Spanish, Persian, French, Hebrew, Hindi, Indonesian, Italian, Japanese, Khmer, Korean, Lao, Malay, Burmese, Dutch, Polish, Portuguese, Russian, Thai, Tagalog, Turkish, Urdu, Vietnamese, Chinese. +- **Github:** Please find more details in our [Github repository](https://github.com/xiaomi-research/gemmax). + +## Model Performance + +![Experimental Result](main.png) + + +## Run the model + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_id = "ModelSpace/GemmaX2-28-9B-v0.1" +tokenizer = AutoTokenizer.from_pretrained(model_id) + +model = AutoModelForCausalLM.from_pretrained(model_id) + +text = "Translate this from Chinese to English:\nChinese: 我爱机器翻译\nEnglish:" +inputs = tokenizer(text, return_tensors="pt") + +outputs = model.generate(**inputs, max_new_tokens=512) +print(tokenizer.decode(outputs[0], skip_special_tokens=True)) +``` + + +## Citation + +```bibtex +@misc{cui2025multilingualmachinetranslationopen, + title={Multilingual Machine Translation with Open Large Language Models at Practical Scale: An Empirical Study}, + author={Menglong Cui and Pengzhi Gao and Wei Liu and Jian Luan and Bin Wang}, + year={2025}, + eprint={2502.02481}, + archivePrefix={arXiv}, + primaryClass={cs.CL}, + url={https://arxiv.org/abs/2502.02481}, +} +``` + + +## Limitations + +GemmaX2-28-9B-v0.1 only supports the 28 languages listed above and does not guarantee strong translation performance for other languages. We will continue to enhance the translation performance of GemmaX2-28-9B, and future models will be released in due course. diff --git a/config.json b/config.json new file mode 100644 index 0000000..a0abd4a --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "_name_or_path": "/mnt/pfs/cuimenglong/work/paper/Pretraining/checkpoints/Gemma2-9B-best/checkpoint-35096", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 16, + "num_hidden_layers": 42, + "num_key_value_heads": 8, + "pad_token_id": 0, + "query_pre_attn_scalar": 224, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.44.2", + "use_cache": true, + "vocab_size": 256000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..b7f8de3 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.44.2" +} diff --git a/main.png b/main.png new file mode 100644 index 0000000..b2cb32a --- /dev/null +++ b/main.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ea0eb824b477834a1ac93e58eb87ecae6ec9fb14fd48d3743f246b75af6307 +size 795557 diff --git a/model-00001-of-00005.safetensors b/model-00001-of-00005.safetensors new file mode 100644 index 0000000..3c2fdf5 --- /dev/null +++ b/model-00001-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1851beb256481deb2a46f115c9ba9c88812808aa734eeb113d9be376639980 +size 4903351912 diff --git a/model-00002-of-00005.safetensors b/model-00002-of-00005.safetensors new file mode 100644 index 0000000..931f25c --- /dev/null +++ b/model-00002-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038e4272b8534154d7fe10245c06c87afcc797ab01bd1ab44f7588d1963db57a +size 4947570872 diff --git a/model-00003-of-00005.safetensors b/model-00003-of-00005.safetensors new file mode 100644 index 0000000..20894eb --- /dev/null +++ b/model-00003-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c26f7a8e5a5e4379f589a9b3b6a5b48d2230ffecef1c5dd1f12f1cdab58b6e1 +size 4962221464 diff --git a/model-00004-of-00005.safetensors b/model-00004-of-00005.safetensors new file mode 100644 index 0000000..c316507 --- /dev/null +++ b/model-00004-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ffb5f324823a009e399775ffb145e816ced8c19dc53d98483c39bf2fcae892 +size 3670322200 diff --git a/model-00005-of-00005.safetensors b/model-00005-of-00005.safetensors new file mode 100644 index 0000000..4185de6 --- /dev/null +++ b/model-00005-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d57eab6b5d6ec33725612376aef3b36acfae86ea115f6623190bfc98d4c27a +size 1835008128 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..b3a31a4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,472 @@ +{ + "metadata": { + "total_size": 20318419968 + }, + "weight_map": { + "lm_head.weight": "model-00005-of-00005.safetensors", + "model.embed_tokens.weight": "model-00001-of-00005.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.20.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.20.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.21.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.21.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.post_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.pre_feedforward_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.32.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.32.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.40.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.post_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.pre_feedforward_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.7.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.7.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.8.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.8.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.9.post_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.9.pre_feedforward_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.norm.weight": "model-00004-of-00005.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8d6368f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,34 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..5771f48 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..796efe9 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..1ededd3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1759 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}