commit eaa7a47bbcf066b6b15d12bbbcd62a7006456e5c
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Sun Apr 12 10:53:00 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9ce5540
--- /dev/null
+++ b/README.md
@@ -0,0 +1,92 @@
+---
+license: cc-by-nc-4.0
+---
+
+
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/630dfb008df86f1e5becadc3/vwcJfOnL-2QDJ0ShfxRJ5.png)
+
+
+
+---
+
+# Disclaimer:
+## This model is experimental, do not expect everything to work.
+
+This model uses the Alpaca **prompting format**(or just directly download the SillyTavern instruct preset [here](https://files.catbox.moe/0ohmco.json))
+
+---
+
+
+Beeg noromaid on ***steroids***. Suitable for RP, ERP.
+
+This time based on Mixtral Instruct, seems to do wonders!
+
+This model was trained for 8h(v1) + 8h(v2) + 12h(v3) on customized modified datasets, focusing on RP, uncensoring, and a modified version of the Alpaca prompting (that was already used in LimaRP), which should be at the same conversational level as ChatLM or Llama2-Chat without adding any additional special tokens.
+
+If you wanna have more infos about this model(and v1 + v2) you can check out [my blog post](https://ikaridevgit.github.io/index.html?p=7&blog=blogid-6&bo=true)
+
+[Recommended settings - Settings 1](https://huggingface.co/NeverSleep/Noromaid-v0.1-mixtral-8x7b-v3/discussions/1)
+
+[Recommended settings - Settings 2 (idk if they are any good)](https://files.catbox.moe/fv4xhu.json)
+
+## Credits:
+- Undi
+- IkariDev
+
+<!-- description start -->
+## Description
+
+<!-- [Recommended settings - contributed by localfultonextractor](https://files.catbox.moe/ue0tja.json) -->
+
+This repo contains FP16 files of Noromaid-v0.1-mixtral-8x7b-Instruct-v3.
+
+[FP16 - by IkariDev and Undi](https://huggingface.co/NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3)
+
+<!-- [GGUF - By TheBloke](https://huggingface.co/TheBloke/Athena-v4-GGUF)-->
+
+<!-- [GPTQ - By TheBloke](https://huggingface.co/TheBloke/Athena-v4-GPTQ)-->
+
+<!-- [exl2[8bpw-8h] - by AzureBlack](https://huggingface.co/AzureBlack/Echidna-13b-v0.3-8bpw-8h-exl2)-->
+
+<!-- [AWQ - By TheBloke](https://huggingface.co/TheBloke/Athena-v4-AWQ)-->
+
+<!-- [fp16 - by IkariDev+Undi95](https://huggingface.co/IkariDev/Athena-v4)-->
+
+[GGUF - by IkariDev and Undi](https://huggingface.co/NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3-GGUF)
+<!-- [OLD(GGUF - by IkariDev+Undi95)](https://huggingface.co/IkariDev/Athena-v4-GGUF)-->
+
+## Ratings:
+
+Note: We have permission of all users to upload their ratings, we DONT screenshot random reviews without asking if we can put them here!
+
+No ratings yet!
+
+If you want your rating to be here, send us a message over on DC and we'll put up a screenshot of it here. DC name is "ikaridev" and "undi".
+
+<!-- description end -->
+<!-- prompt-template start -->
+### Custom format:
+```
+### Instruction:
+{system prompt}
+
+### Input:
+{input}
+
+### Response:
+{reply}
+```
+
+## Datasets used:
+
+- Aesir 1 and 2 ([MinervaAI](https://huggingface.co/MinervaAI) / [Gryphe](https://huggingface.co/Gryphe))
+- [LimaRP-20231109](https://huggingface.co/datasets/lemonilia/LimaRP) ([Lemonilia](https://huggingface.co/lemonilia))
+- [ToxicDPO-NoWarning](https://huggingface.co/datasets/Undi95/toxic-dpo-v0.1-sharegpt) ([unalignment orga repo](https://huggingface.co/unalignment) + [Undi](https://huggingface.co/Undi95))
+- [No-robots-ShareGPT](https://huggingface.co/datasets/Doctor-Shotgun/no-robots-sharegpt) ([Doctor-Shotgun](https://huggingface.co/Doctor-Shotgu))
+
+
+## Others
+
+Undi: If you want to support me, you can [here](https://ko-fi.com/undiai).
+
+IkariDev: Visit my [retro/neocities style website](https://ikaridevgit.github.io/) please kek
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..75368d0
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+  "_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mixtral",
+  "num_attention_heads": 32,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "num_local_experts": 8,
+  "output_router_logits": true,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "router_aux_loss_coef": 0.02,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.36.2",
+  "use_cache": false,
+  "vocab_size": 32000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..c533f93
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.36.2"
+}
diff --git a/pytorch_model-00001-of-00019.bin b/pytorch_model-00001-of-00019.bin
new file mode 100644
index 0000000..a9c39c2
--- /dev/null
+++ b/pytorch_model-00001-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:944dc4d54308f6d651934949757fa9c347928065fbe598d456e54b6dca9f39c2
+size 4892820801
diff --git a/pytorch_model-00002-of-00019.bin b/pytorch_model-00002-of-00019.bin
new file mode 100644
index 0000000..fdf7158
--- /dev/null
+++ b/pytorch_model-00002-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a24a090d27be77b32fa11656168039bda06bc32ac7a901d0a2df6be15f6e8b2a
+size 4983016125
diff --git a/pytorch_model-00003-of-00019.bin b/pytorch_model-00003-of-00019.bin
new file mode 100644
index 0000000..40cd5b7
--- /dev/null
+++ b/pytorch_model-00003-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c2e83d42856bdc94bf5952875019441a28dfdd69ce8989d67997f5aae517c3f
+size 4983016209
diff --git a/pytorch_model-00004-of-00019.bin b/pytorch_model-00004-of-00019.bin
new file mode 100644
index 0000000..85287ee
--- /dev/null
+++ b/pytorch_model-00004-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9197be8d0694eb9e02fe5bb14f855590401867a850c0beb7254aed9e0036b683
+size 4899045759
diff --git a/pytorch_model-00005-of-00019.bin b/pytorch_model-00005-of-00019.bin
new file mode 100644
index 0000000..097c48a
--- /dev/null
+++ b/pytorch_model-00005-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff3f302fee1e6e5549c4eb8b0c9c964fe97c9e0ccf558fb8926aa1f6797d14a1
+size 4983016161
diff --git a/pytorch_model-00006-of-00019.bin b/pytorch_model-00006-of-00019.bin
new file mode 100644
index 0000000..0476f72
--- /dev/null
+++ b/pytorch_model-00006-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9882c03b747df93a2911628c24bd344aea3f01cee04f790062d7f7f3eb75409f
+size 4983016125
diff --git a/pytorch_model-00007-of-00019.bin b/pytorch_model-00007-of-00019.bin
new file mode 100644
index 0000000..748fe7e
--- /dev/null
+++ b/pytorch_model-00007-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1f357b5eb896df9fcfce9f1db4ec04548b571f2305e7e7e8de454440ef3096a
+size 4899045759
diff --git a/pytorch_model-00008-of-00019.bin b/pytorch_model-00008-of-00019.bin
new file mode 100644
index 0000000..7baaa37
--- /dev/null
+++ b/pytorch_model-00008-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e7bc9b97aa9fe1854c0097b3e719ae9fc4897d2d1bc2dd273f0bc34fcc99cf9
+size 4983016185
diff --git a/pytorch_model-00009-of-00019.bin b/pytorch_model-00009-of-00019.bin
new file mode 100644
index 0000000..c9d0658
--- /dev/null
+++ b/pytorch_model-00009-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1136dc942f87b92979648f476464476c657f30d5a0b457c20e97461bed828bb0
+size 4983016125
diff --git a/pytorch_model-00010-of-00019.bin b/pytorch_model-00010-of-00019.bin
new file mode 100644
index 0000000..b5b8787
--- /dev/null
+++ b/pytorch_model-00010-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5dad23664d9945c85c9284ca4f8ba298edfdd0dbde4b16271f26e9ec52327d4
+size 4899045759
diff --git a/pytorch_model-00011-of-00019.bin b/pytorch_model-00011-of-00019.bin
new file mode 100644
index 0000000..f38e976
--- /dev/null
+++ b/pytorch_model-00011-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885af9d95b351304dbc9804cee2155a4809107c419e1794fc071e18171ea31b2
+size 4983016149
diff --git a/pytorch_model-00012-of-00019.bin b/pytorch_model-00012-of-00019.bin
new file mode 100644
index 0000000..854da90
--- /dev/null
+++ b/pytorch_model-00012-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69aeedb1a62a6875d214f596dc56123f4ad268397913479f3720164690d03001
+size 4983016149
diff --git a/pytorch_model-00013-of-00019.bin b/pytorch_model-00013-of-00019.bin
new file mode 100644
index 0000000..c63ee61
--- /dev/null
+++ b/pytorch_model-00013-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21a536a109405ef3d5a9e9008927dc8a8e4fd6d566c7152cde6d5f661023b81
+size 4983016125
diff --git a/pytorch_model-00014-of-00019.bin b/pytorch_model-00014-of-00019.bin
new file mode 100644
index 0000000..6c22fea
--- /dev/null
+++ b/pytorch_model-00014-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4117f0c4adaf62ae4179b80d60f8b4cf095317c8cdee3b707a21a8e1cd951431
+size 4899045759
diff --git a/pytorch_model-00015-of-00019.bin b/pytorch_model-00015-of-00019.bin
new file mode 100644
index 0000000..8ed490f
--- /dev/null
+++ b/pytorch_model-00015-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9305bc0f6cda73bc60f1679430faceb9cf284e29971858fecc8ef1b24783b329
+size 4983016185
diff --git a/pytorch_model-00016-of-00019.bin b/pytorch_model-00016-of-00019.bin
new file mode 100644
index 0000000..712fe4d
--- /dev/null
+++ b/pytorch_model-00016-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79ca0bbef7e63f9565ec06d1f349fe25860b12831d62496a548821e0631d3e63
+size 4983016125
diff --git a/pytorch_model-00017-of-00019.bin b/pytorch_model-00017-of-00019.bin
new file mode 100644
index 0000000..750edd8
--- /dev/null
+++ b/pytorch_model-00017-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdcdb2420de7552ba0fa300185a9aedde7c6d4e5f0c710eff29bb3c3b6c1edeb
+size 4899045759
diff --git a/pytorch_model-00018-of-00019.bin b/pytorch_model-00018-of-00019.bin
new file mode 100644
index 0000000..c6b88a6
--- /dev/null
+++ b/pytorch_model-00018-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f264f38ca462b776f0d441e0382ce7bdf9848aa2fe992ab111671ad556da152
+size 4983016161
diff --git a/pytorch_model-00019-of-00019.bin b/pytorch_model-00019-of-00019.bin
new file mode 100644
index 0000000..3bbb24e
--- /dev/null
+++ b/pytorch_model-00019-of-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac74a18ea2e0d9e59b8f09ea1076b21ab73442e1846e8d11116a27a1209bf3ed
+size 4221688679
diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json
new file mode 100644
index 0000000..c105000
--- /dev/null
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,1002 @@
+{
+  "metadata": {
+    "total_size": 93405585408
+  },
+  "weight_map": {
+    "lm_head.weight": "pytorch_model-00019-of-00019.bin",
+    "model.embed_tokens.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.block_sparse_moe.gate.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.block_sparse_moe.gate.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.block_sparse_moe.gate.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.input_layernorm.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.block_sparse_moe.gate.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.input_layernorm.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00007-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.block_sparse_moe.gate.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.input_layernorm.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.block_sparse_moe.gate.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.input_layernorm.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00008-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.block_sparse_moe.gate.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.input_layernorm.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.block_sparse_moe.gate.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.input_layernorm.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00009-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.16.block_sparse_moe.gate.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.input_layernorm.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00010-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.block_sparse_moe.gate.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.input_layernorm.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.block_sparse_moe.gate.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.input_layernorm.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00011-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.block_sparse_moe.gate.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.input_layernorm.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.block_sparse_moe.gate.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.block_sparse_moe.gate.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.input_layernorm.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00012-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.block_sparse_moe.gate.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.input_layernorm.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.block_sparse_moe.gate.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.input_layernorm.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00013-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.block_sparse_moe.gate.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.input_layernorm.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00014-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.block_sparse_moe.gate.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.input_layernorm.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.block_sparse_moe.gate.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.input_layernorm.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00015-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.block_sparse_moe.gate.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.input_layernorm.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.block_sparse_moe.gate.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.input_layernorm.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00016-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.28.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.28.block_sparse_moe.gate.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.input_layernorm.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00017-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.block_sparse_moe.gate.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.input_layernorm.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.block_sparse_moe.gate.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.input_layernorm.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.block_sparse_moe.gate.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.input_layernorm.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00018-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.block_sparse_moe.gate.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.input_layernorm.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00019-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.block_sparse_moe.gate.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.input_layernorm.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.block_sparse_moe.gate.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.5.input_layernorm.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00003-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.block_sparse_moe.gate.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.input_layernorm.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00004-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.block_sparse_moe.gate.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.input_layernorm.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.block_sparse_moe.gate.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.input_layernorm.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00005-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.0.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.0.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.0.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.1.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.1.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.1.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.2.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.2.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.2.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.3.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.3.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.3.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.4.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.4.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.4.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.5.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.5.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.5.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.6.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.6.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.6.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.7.w1.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.7.w2.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.experts.7.w3.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.block_sparse_moe.gate.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.input_layernorm.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00006-of-00019.bin",
+    "model.norm.weight": "pytorch_model-00019-of-00019.bin"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..72ecfee
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..8b443ef
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..bc00187
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% set ns = namespace(found=false) %}{% for message in messages %}{% if message['role'] == 'system' %}{% set ns.found = true %}{% endif %}{% endfor %}{% if not ns.found %}{{ '### Instruction:\nYou are a chatbot.\n\n' }}{% endif %}{% for message in messages %}{% if message['role'] == 'system' %}{{ '### Instruction:\n' + message['content'] + '\n\n' }}{% else %}{% if message['role'] == 'user' %}{{ '### Input:\n' + message['content'] + '\n\n' }}{% else %}{{ '### Response:\n' + message['content'] + '\n\n' }}{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '### Response:\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "trust_remote_code": false,
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false,
+  "use_fast": true
+}