commit 4f4cfacd368055f746b7df317e1b198a2a909a1f Author: ModelHub XC Date: Sun Apr 12 11:52:58 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: llm-jp/optimal-sparsity-code-d2048-E64-k4-26.4B-A2.3B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9c9c49b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,60 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00004-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00005-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00009-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00008-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00007-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00006-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00010-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00011-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b002e9 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +--- +pipeline_tag: text-generation +library_name: transformers +license: apache-2.0 +tags: + - mixtral + - moe + - reasoning +--- + +# Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks + +This repository contains model checkpoints from the paper [Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks](https://huggingface.co/papers/2508.18672). + +For more details, including code and evaluation procedures, please refer to the official GitHub repository: [https://github.com/rioyokotalab/optimal-sparsity](https://github.com/rioyokotalab/optimal-sparsity) + +## How to cite + +If you find our work helpful, please feel free to cite the paper. + +```bibtex +@inproceedings{ + nakamura2026optimal, + title={Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks}, + author={Taishi Nakamura and Satoki Ishikawa and Masaki Kawamura and Takumi Okamoto and Daisuke Nohara and Jun Suzuki and Rio Yokota}, + booktitle={The Fourteenth International Conference on Learning Representations}, + year={2026}, + url={https://openreview.net/forum?id=XFw2EPRUUR} +} +``` + diff --git a/config.json b/config.json new file mode 100644 index 0000000..404fab9 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 4096, + "max_position_embeddings": 4096, + "mlp_bias": false, + "model_type": "mixtral", + "num_attention_heads": 16, + "num_experts_per_tok": 4, + "num_hidden_layers": 16, + "num_key_value_heads": 16, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "router_aux_loss_coef": 0.01, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.0", + "use_cache": true, + "vocab_size": 99584 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/model-00001-of-00011.safetensors b/model-00001-of-00011.safetensors new file mode 100644 index 0000000..b87742e --- /dev/null +++ b/model-00001-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd862d19352933eb6bf5b4fd37d54a94017000b9a6a5512faf7f73702d4c3100 +size 4992445944 diff --git a/model-00002-of-00011.safetensors b/model-00002-of-00011.safetensors new file mode 100644 index 0000000..992b516 --- /dev/null +++ b/model-00002-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d40b386bb4c1bc6bae8a3afd1eb14117cd8a51e2767d38a4a334614379e09d4b +size 4999649272 diff --git a/model-00003-of-00011.safetensors b/model-00003-of-00011.safetensors new file mode 100644 index 0000000..b3d0fbd --- /dev/null +++ b/model-00003-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59ddbddd8bb0be28dd454e7f3af646291df9762d4e2f8008a322bca04fe2255 +size 4999649400 diff --git a/model-00004-of-00011.safetensors b/model-00004-of-00011.safetensors new file mode 100644 index 0000000..146708c --- /dev/null +++ b/model-00004-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:917ab5f1ca1172883fbdd970b7b399f7ce5b8fb4347191f65e9868a10d05baf9 +size 4999649440 diff --git a/model-00005-of-00011.safetensors b/model-00005-of-00011.safetensors new file mode 100644 index 0000000..61f4902 --- /dev/null +++ b/model-00005-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabaf4cb4a5724d63f7cbd1a88018c8dab05850007f907623d4e72d7d95eb6e3 +size 4999649368 diff --git a/model-00006-of-00011.safetensors b/model-00006-of-00011.safetensors new file mode 100644 index 0000000..4f35353 --- /dev/null +++ b/model-00006-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6cddfd752de0980c3f008a9a99c6591c5e731453845d30c7ab13a2ce1661b8c +size 4999649416 diff --git a/model-00007-of-00011.safetensors b/model-00007-of-00011.safetensors new file mode 100644 index 0000000..065d03c --- /dev/null +++ b/model-00007-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f574c05e36062f3f1df48443982979cfe5462c9a6343a1cc4ebf6309bb3eb89d +size 4999649456 diff --git a/model-00008-of-00011.safetensors b/model-00008-of-00011.safetensors new file mode 100644 index 0000000..7c9c284 --- /dev/null +++ b/model-00008-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67be2f4b51fd539fb8b589f0e5fc1dc75e2e76c03ff6e9fb5dd62d9f0fa82a89 +size 4999649456 diff --git a/model-00009-of-00011.safetensors b/model-00009-of-00011.safetensors new file mode 100644 index 0000000..10e6367 --- /dev/null +++ b/model-00009-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07dbb7bb96696bf2d7b67b061ff0dfb3e3a6b281bc8609524f7b5367d0087786 +size 4999649392 diff --git a/model-00010-of-00011.safetensors b/model-00010-of-00011.safetensors new file mode 100644 index 0000000..35ac3cf --- /dev/null +++ b/model-00010-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9af4b6bfbc8ca26579c1ea60703f2b5f3d129b46d495393d1bf15a068e719f5 +size 4999649376 diff --git a/model-00011-of-00011.safetensors b/model-00011-of-00011.safetensors new file mode 100644 index 0000000..3e7db79 --- /dev/null +++ b/model-00011-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d81d7e9f8affb1193b994a02b5cbfd5a42c4cbdc35d4b8ea32be7befefdfd3 +size 2907725152 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..9e296a8 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3194 @@ +{ + "metadata": { + "total_size": 52896600064 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.norm.weight": "model-00011-of-00011.safetensors", + "lm_head.weight": "model-00011-of-00011.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8644c8f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,10 @@ +{ + "bos_token": "", + "cls_token": "", + "eod_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..fc80107 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955dc1fa623fab38cc92a3f4ee172423ae6d73201c4207569bfdf5626bc733f0 +size 6416433 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..09aa857 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,18 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "unk_token": "", + "bos_token": "", + "eos_token": "", + "pad_token": "", + "cls_token": "", + "sep_token": "", + "eod_token": "", + "mask_token": "", + "extra_ids": 0, + "sp_model_kwargs": {}, + "model_max_length": 1000000000000000019884624838656, + "clean_up_tokenization_spaces": false, + "special_tokens_map_file": null, + "tokenizer_class": "PreTrainedTokenizerFast" +}