commit 7a70b241fdcaf41f3c8b97c3aaec7c067fe495f9 Author: ModelHub XC Date: Mon Apr 13 10:59:36 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: llm-jp/optimal-sparsity-math-d2048-E64-k16-26.4B-A7.1B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fd2b109 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,59 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00001-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00007-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00008-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00010-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00005-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00011-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text +model-00009-of-00011.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b002e9 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +--- +pipeline_tag: text-generation +library_name: transformers +license: apache-2.0 +tags: + - mixtral + - moe + - reasoning +--- + +# Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks + +This repository contains model checkpoints from the paper [Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks](https://huggingface.co/papers/2508.18672). + +For more details, including code and evaluation procedures, please refer to the official GitHub repository: [https://github.com/rioyokotalab/optimal-sparsity](https://github.com/rioyokotalab/optimal-sparsity) + +## How to cite + +If you find our work helpful, please feel free to cite the paper. + +```bibtex +@inproceedings{ + nakamura2026optimal, + title={Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks}, + author={Taishi Nakamura and Satoki Ishikawa and Masaki Kawamura and Takumi Okamoto and Daisuke Nohara and Jun Suzuki and Rio Yokota}, + booktitle={The Fourteenth International Conference on Learning Representations}, + year={2026}, + url={https://openreview.net/forum?id=XFw2EPRUUR} +} +``` + diff --git a/config.json b/config.json new file mode 100644 index 0000000..3b6a329 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 4096, + "max_position_embeddings": 4096, + "mlp_bias": false, + "model_type": "mixtral", + "num_attention_heads": 16, + "num_experts_per_tok": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 16, + "num_local_experts": 64, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "router_aux_loss_coef": 0.01, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.0", + "use_cache": true, + "vocab_size": 99584 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/model-00001-of-00011.safetensors b/model-00001-of-00011.safetensors new file mode 100644 index 0000000..e08aca8 --- /dev/null +++ b/model-00001-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a574b169d11335d445fd9632e36b48ff4330b57d53322804103c994765716046 +size 4990278800 diff --git a/model-00002-of-00011.safetensors b/model-00002-of-00011.safetensors new file mode 100644 index 0000000..5be8516 --- /dev/null +++ b/model-00002-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe31a54b55b067a6902ffa5dcc5eb79d07943f1327731f747e51060520638da4 +size 4999649280 diff --git a/model-00003-of-00011.safetensors b/model-00003-of-00011.safetensors new file mode 100644 index 0000000..29f6efe --- /dev/null +++ b/model-00003-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb73246e22dd47432f30b3c0df010a8e2f7395ca0a587a160799a0fd32f9bc85 +size 4999649312 diff --git a/model-00004-of-00011.safetensors b/model-00004-of-00011.safetensors new file mode 100644 index 0000000..a8acf29 --- /dev/null +++ b/model-00004-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d1c9f15a2c248dfce7f7cdbc7bd9568606f12200d322e57d07e6ecfb04eba3 +size 4999649312 diff --git a/model-00005-of-00011.safetensors b/model-00005-of-00011.safetensors new file mode 100644 index 0000000..c3e2f01 --- /dev/null +++ b/model-00005-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2d4940d42b35c1b2ed43ffd4ec434c5cf83e323f7cebd95553c47d6bc0b2f9 +size 4999649312 diff --git a/model-00006-of-00011.safetensors b/model-00006-of-00011.safetensors new file mode 100644 index 0000000..dd77e14 --- /dev/null +++ b/model-00006-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0a78b929c6a619377595c59b8b020fd6c5c57bd4ffb051c8757db15e286c1d +size 4985039080 diff --git a/model-00007-of-00011.safetensors b/model-00007-of-00011.safetensors new file mode 100644 index 0000000..1753460 --- /dev/null +++ b/model-00007-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05e63444aa598152b92be95425b56a7589c031b0c8a2d2d0e2cc2d605a9290a +size 4999649472 diff --git a/model-00008-of-00011.safetensors b/model-00008-of-00011.safetensors new file mode 100644 index 0000000..10ada63 --- /dev/null +++ b/model-00008-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6886d2cae98e8c0719603f3bddc7e8feafea3c57bc2b36d9e9ce3391c6bb6f +size 4999649544 diff --git a/model-00009-of-00011.safetensors b/model-00009-of-00011.safetensors new file mode 100644 index 0000000..bacb59c --- /dev/null +++ b/model-00009-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27bf1cb88e9f68867f4f97fe959344b14f6cfb55155e2d1540620b8a6509551c +size 4999649536 diff --git a/model-00010-of-00011.safetensors b/model-00010-of-00011.safetensors new file mode 100644 index 0000000..2b0a355 --- /dev/null +++ b/model-00010-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40aa667ee62a1bb21fe8c2117f999201d2db57c8f95801d370f7f7870e550a8f +size 4999649520 diff --git a/model-00011-of-00011.safetensors b/model-00011-of-00011.safetensors new file mode 100644 index 0000000..27f0228 --- /dev/null +++ b/model-00011-of-00011.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080f51503e752aaf1f3e388a8daeed4ea573a0f52e4aa7d3cb3dded1e7fc9a9f +size 2924502504 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..121f738 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3194 @@ +{ + "metadata": { + "total_size": 52896600064 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00011.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.8.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.9.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.10.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.11.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.12.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.13.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.14.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.15.input_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00011.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00006-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00007-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w1.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w3.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w2.weight": "model-00008-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w2.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w1.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w3.weight": "model-00009-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w1.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w3.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w2.weight": "model-00010-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w1.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w3.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w2.weight": "model-00011-of-00011.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w2.weight": "model-00011-of-00011.safetensors", + "model.norm.weight": "model-00011-of-00011.safetensors", + "lm_head.weight": "model-00011-of-00011.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8644c8f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,10 @@ +{ + "bos_token": "", + "cls_token": "", + "eod_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..fc80107 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955dc1fa623fab38cc92a3f4ee172423ae6d73201c4207569bfdf5626bc733f0 +size 6416433 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..09aa857 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,18 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "unk_token": "", + "bos_token": "", + "eos_token": "", + "pad_token": "", + "cls_token": "", + "sep_token": "", + "eod_token": "", + "mask_token": "", + "extra_ids": 0, + "sp_model_kwargs": {}, + "model_max_length": 1000000000000000019884624838656, + "clean_up_tokenization_spaces": false, + "special_tokens_map_file": null, + "tokenizer_class": "PreTrainedTokenizerFast" +}