From 71442e79bf00e53be87115c125f2ee3a35088067 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 28 Jun 2026 03:32:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: louisbrulenaudet/DevPearl-2x7B Source: Original Platform --- .gitattributes | 62 +++++++++++++++++++++++ README.md | 85 ++++++++++++++++++++++++++++++++ config.json | 33 +++++++++++++ configuration.json | 1 + mergekit_moe_config.yml | 11 +++++ model-00001-of-00012.safetensors | 3 ++ model-00002-of-00012.safetensors | 3 ++ model-00003-of-00012.safetensors | 3 ++ model-00004-of-00012.safetensors | 3 ++ model-00005-of-00012.safetensors | 3 ++ model-00006-of-00012.safetensors | 3 ++ model-00007-of-00012.safetensors | 3 ++ model-00008-of-00012.safetensors | 3 ++ model-00009-of-00012.safetensors | 3 ++ model-00010-of-00012.safetensors | 3 ++ model-00011-of-00012.safetensors | 3 ++ model-00012-of-00012.safetensors | 3 ++ model.safetensors.index.json | 1 + special_tokens_map.json | 30 +++++++++++ tokenizer.json | 3 ++ tokenizer.model | 3 ++ tokenizer_config.json | 83 +++++++++++++++++++++++++++++++ 22 files changed, 348 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 mergekit_moe_config.yml create mode 100644 model-00001-of-00012.safetensors create mode 100644 model-00002-of-00012.safetensors create mode 100644 model-00003-of-00012.safetensors create mode 100644 model-00004-of-00012.safetensors create mode 100644 model-00005-of-00012.safetensors create mode 100644 model-00006-of-00012.safetensors create mode 100644 model-00007-of-00012.safetensors create mode 100644 model-00008-of-00012.safetensors create mode 100644 model-00009-of-00012.safetensors create mode 100644 model-00010-of-00012.safetensors create mode 100644 model-00011-of-00012.safetensors create mode 100644 model-00012-of-00012.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c977b17 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,62 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text + +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00008-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00011-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00012-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00006-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00007-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00005-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text +model-00009-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text +model-00010-of-00012.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c301804 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +--- +license: cc-by-sa-4.0 +tags: +- moe +- merge +- mergekit +- lazymergekit +- deepseek-ai/deepseek-coder-6.7b-instruct +- defog/sqlcoder-7b-2 +- Python +- Javascript +- sql +base_model: +- deepseek-ai/deepseek-coder-6.7b-instruct +- defog/sqlcoder-7b-2 +language: +- en +library_name: transformers +pipeline_tag: text-generation +--- +
+ +# DevPearl-2x7B, an xtraordinary Mixture of Experts (MoE) for development + +DevPearl-2x7B is a Mixture of Experts (MoE) made with the following models : +* [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) +* [defog/sqlcoder-7b-2](https://huggingface.co/defog/sqlcoder-7b-2) + +A Mixture of Experts (MoE) model represents a sophisticated architecture that amalgamates the capabilities of multiple specialized models to address a wide array of tasks within a unified framework. Within the realm of a MoE model tailored for a chat application, the integration of expertise spanning three distinct domains - chat, code, and mathematics - substantially enhances its capacity to furnish nuanced and precise responses to a diverse spectrum of user inquiries. + +## Configuration + +```yaml +base_model: codellama/CodeLlama-7b-Instruct-hf +experts: + - source_model: deepseek-ai/deepseek-coder-6.7b-instruct + positive_prompts: + - "python" + - "javascript" + - "java" + - source_model: defog/sqlcoder-7b-2 + positive_prompts: + - "SQL" +``` + +## Usage + +```python +!pip install -qU transformers bitsandbytes accelerate + +from transformers import AutoTokenizer +import transformers +import torch + +model = "louisbrulenaudet/DevPearl-2x7B" + +tokenizer = AutoTokenizer.from_pretrained(model) +pipeline = transformers.pipeline( + "text-generation", + model=model, + model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True}, +) + +messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}] +prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) +outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) +print(outputs[0]["generated_text"]) +``` + +## Citing & Authors + +If you use this code in your research, please use the following BibTeX entry. + +```BibTeX +@misc{louisbrulenaudet2023, + author = {Louis Brulé Naudet}, + title = {DevPearl-2x7B, an xtraordinary Mixture of Experts (MoE) for development}, + year = {2024} + howpublished = {\url{https://huggingface.co/louisbrulenaudet/DevPearl-2x7B}}, +} +``` + +## Feedback + +If you have any feedback, please reach out at [louisbrulenaudet@icloud.com](mailto:louisbrulenaudet@icloud.com). \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..f516d14 --- /dev/null +++ b/config.json @@ -0,0 +1,33 @@ +{ + "_name_or_path": "codellama/CodeLlama-7b-Instruct-hf", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 16384, + "model_type": "mixtral", + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "num_local_experts": 2, + "output_router_logits": false, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "use_cache": true, + "vocab_size": 32016 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/mergekit_moe_config.yml b/mergekit_moe_config.yml new file mode 100644 index 0000000..e14c2c4 --- /dev/null +++ b/mergekit_moe_config.yml @@ -0,0 +1,11 @@ + +base_model: codellama/CodeLlama-7b-Instruct-hf +experts: + - source_model: deepseek-ai/deepseek-coder-6.7b-instruct + positive_prompts: + - "python" + - "javascript" + - "java" + - source_model: defog/sqlcoder-7b-2 + positive_prompts: + - "SQL" diff --git a/model-00001-of-00012.safetensors b/model-00001-of-00012.safetensors new file mode 100644 index 0000000..daf81f8 --- /dev/null +++ b/model-00001-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c8cfd30cb39dc40e86461f7687a0c20db266aabd73eeb01aad6a9a078112ee +size 1967667016 diff --git a/model-00002-of-00012.safetensors b/model-00002-of-00012.safetensors new file mode 100644 index 0000000..7df4194 --- /dev/null +++ b/model-00002-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22804e6a05ad508e9eef0fca1f80e2b689f857e2ba13f18df6a3d99b2093ce01 +size 1983908704 diff --git a/model-00003-of-00012.safetensors b/model-00003-of-00012.safetensors new file mode 100644 index 0000000..f2a6a18 --- /dev/null +++ b/model-00003-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aca9953073faeb0250ecb66742f3498651e11de935cf325c792f314b2bb2b9f +size 1983908704 diff --git a/model-00004-of-00012.safetensors b/model-00004-of-00012.safetensors new file mode 100644 index 0000000..d7a8061 --- /dev/null +++ b/model-00004-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a586bbb06748c4cf5997ee0e55f99a574b23a17b2867aa9fa1725d5c13755aaa +size 1983908688 diff --git a/model-00005-of-00012.safetensors b/model-00005-of-00012.safetensors new file mode 100644 index 0000000..af5d659 --- /dev/null +++ b/model-00005-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68285c2b2f3edc5e3d7c3aa0f35c036e239628f36bab7c84db1766e4f0ef46a +size 1983908704 diff --git a/model-00006-of-00012.safetensors b/model-00006-of-00012.safetensors new file mode 100644 index 0000000..d90c556 --- /dev/null +++ b/model-00006-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b95f1732c8ecc1ced3088f5a3676c848e19b7af7ae9e62b3f86d288ed773c60d +size 1983908704 diff --git a/model-00007-of-00012.safetensors b/model-00007-of-00012.safetensors new file mode 100644 index 0000000..3959d7a --- /dev/null +++ b/model-00007-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c87c000da61aace77724d31e05dece4f1f349946a596717fa832505dd6a9976 +size 1983908688 diff --git a/model-00008-of-00012.safetensors b/model-00008-of-00012.safetensors new file mode 100644 index 0000000..5011385 --- /dev/null +++ b/model-00008-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a1087589f1b1c24fd9168232c42ef3d26cd81ba668c45a28279bb699b8cbdd +size 1983908704 diff --git a/model-00009-of-00012.safetensors b/model-00009-of-00012.safetensors new file mode 100644 index 0000000..8b579de --- /dev/null +++ b/model-00009-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60445a104a01284bbdafd713facf3222f175f0154fcdfe195c9e572161048502 +size 1984174568 diff --git a/model-00010-of-00012.safetensors b/model-00010-of-00012.safetensors new file mode 100644 index 0000000..a0c2b5d --- /dev/null +++ b/model-00010-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29fa5b13ac6b4a4062830b62f4210942a12da958169bb041d23b5878b1e847e6 +size 1979718400 diff --git a/model-00011-of-00012.safetensors b/model-00011-of-00012.safetensors new file mode 100644 index 0000000..9654c36 --- /dev/null +++ b/model-00011-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0603f095a392df414d34e59f43a5478f5d80d9059a80ad0b4a469df5fb7957 +size 1979718400 diff --git a/model-00012-of-00012.safetensors b/model-00012-of-00012.safetensors new file mode 100644 index 0000000..2c4b9b3 --- /dev/null +++ b/model-00012-of-00012.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09ee576ca1a4edfb3d5b22b9ec4309c74a42347efe998bfc45c3ce3bf6c7225 +size 336073456 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..019e4d9 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1 @@ +{"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00012.safetensors", "model.norm.weight": "model-00001-of-00012.safetensors", "lm_head.weight": "model-00001-of-00012.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.15.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.16.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.17.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.18.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.19.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.20.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.21.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.22.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.23.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.24.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.25.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.26.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.27.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.28.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.29.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.30.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.31.input_layernorm.weight": "model-00001-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00012.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00012.safetensors", "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00012.safetensors", "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00012.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00009-of-00012.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00010-of-00012.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00012.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00012.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00012.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00012.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00011-of-00012.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00011-of-00012.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00011-of-00012.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00011-of-00012.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00012.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.12.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.13.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.14.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.15.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.16.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.17.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.18.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.19.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.20.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.21.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.22.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.23.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.24.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.25.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.26.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.27.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.28.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.29.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.30.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors", "model.layers.31.block_sparse_moe.gate.weight": "model-00012-of-00012.safetensors"}} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..79ec319 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "additional_special_tokens": [ + "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "",
+  "unk_token": {
+    "content": "",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..3f568e9
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d777eb650ceea7d274b60826b1369ed3748f8d7d36675af1b7be2bf2a913b544
+size 1844315
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..f6722e8
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
+size 500058
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..6a5be4b
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,83 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "▁
",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "▁
",
+    "▁",
+    "▁",
+    "▁"
+  ],
+  "bos_token": "",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "",
+  "eot_token": "▁",
+  "fill_token": "",
+  "legacy": null,
+  "middle_token": "▁",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "",
+  "prefix_token": "▁
",
+  "sp_model_kwargs": {},
+  "suffix_token": "▁",
+  "tokenizer_class": "CodeLlamaTokenizer",
+  "unk_token": "",
+  "use_default_system_prompt": false
+}