From 0a24b4340150a4cfe91745aeb3eedffd3408c553 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 12 Apr 2026 15:02:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: llm-jp/optimal-sparsity-code-d2048-E32-k8-13.6B-A3.9B Source: Original Platform --- .gitattributes | 55 + README.md | 31 + config.json | 32 + configuration.json | 1 + model-00001-of-00006.safetensors | 3 + model-00002-of-00006.safetensors | 3 + model-00003-of-00006.safetensors | 3 + model-00004-of-00006.safetensors | 3 + model-00005-of-00006.safetensors | 3 + model-00006-of-00006.safetensors | 3 + model.safetensors.index.json | 1658 ++++++++++++++++++++++++++++++ special_tokens_map.json | 10 + tokenizer.json | 3 + tokenizer_config.json | 18 + 14 files changed, 1826 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 model-00001-of-00006.safetensors create mode 100644 model-00002-of-00006.safetensors create mode 100644 model-00003-of-00006.safetensors create mode 100644 model-00004-of-00006.safetensors create mode 100644 model-00005-of-00006.safetensors create mode 100644 model-00006-of-00006.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..0e5da7d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00003-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00002-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00005-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00006-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b002e9 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +--- +pipeline_tag: text-generation +library_name: transformers +license: apache-2.0 +tags: + - mixtral + - moe + - reasoning +--- + +# Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks + +This repository contains model checkpoints from the paper [Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks](https://huggingface.co/papers/2508.18672). + +For more details, including code and evaluation procedures, please refer to the official GitHub repository: [https://github.com/rioyokotalab/optimal-sparsity](https://github.com/rioyokotalab/optimal-sparsity) + +## How to cite + +If you find our work helpful, please feel free to cite the paper. + +```bibtex +@inproceedings{ + nakamura2026optimal, + title={Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks}, + author={Taishi Nakamura and Satoki Ishikawa and Masaki Kawamura and Takumi Okamoto and Daisuke Nohara and Jun Suzuki and Rio Yokota}, + booktitle={The Fourteenth International Conference on Learning Representations}, + year={2026}, + url={https://openreview.net/forum?id=XFw2EPRUUR} +} +``` + diff --git a/config.json b/config.json new file mode 100644 index 0000000..97427e1 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 4096, + "max_position_embeddings": 4096, + "mlp_bias": false, + "model_type": "mixtral", + "num_attention_heads": 16, + "num_experts_per_tok": 8, + "num_hidden_layers": 16, + "num_key_value_heads": 16, + "num_local_experts": 32, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "router_aux_loss_coef": 0.01, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.0", + "use_cache": true, + "vocab_size": 99584 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..054e9cd --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:880967010aa62358d5cefd04a9951dc3515ebc377f4f4cd65025e0bc7d674cf0 +size 4990348864 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..20edc92 --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997214c8e6f37aa67a1a2434d95232fe46632bfb639a96746f930f580db6ef90 +size 4999649200 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..4aafa3c --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3be08cc2311999679c02efb8bb0915c52ecb10070d5a000ca9f761793654156 +size 4999649400 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..4954a43 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6b0de49e20ff9b1ea4f4351eb9d0602e2af465a35ebfcb1d1a9f57321ac317 +size 4999649440 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..bbcff08 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000d9d86ba4e57d3cefb3698a5f9ab5bd5d9efa2d2706e8b5469580bb29fb439 +size 4999649400 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..5a229c9 --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd4c81e284cd7df55097717d3a02cbb3ae17c541c72ebdf1d521620c61c4642 +size 2135967120 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..6a32dab --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1658 @@ +{ + "metadata": { + "total_size": 27124699136 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.norm.weight": "model-00006-of-00006.safetensors", + "lm_head.weight": "model-00006-of-00006.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8644c8f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,10 @@ +{ + "bos_token": "", + "cls_token": "", + "eod_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..fc80107 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955dc1fa623fab38cc92a3f4ee172423ae6d73201c4207569bfdf5626bc733f0 +size 6416433 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..09aa857 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,18 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "unk_token": "", + "bos_token": "", + "eos_token": "", + "pad_token": "", + "cls_token": "", + "sep_token": "", + "eod_token": "", + "mask_token": "", + "extra_ids": 0, + "sp_model_kwargs": {}, + "model_max_length": 1000000000000000019884624838656, + "clean_up_tokenization_spaces": false, + "special_tokens_map_file": null, + "tokenizer_class": "PreTrainedTokenizerFast" +}