commit a5448ae591dcf048929ffe2ec765d0b9e8b2ad03 Author: ModelHub XC Date: Sun Apr 12 06:41:56 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: llm-jp/optimal-sparsity-math-d1024-E128-k4-13.2B-A670M Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9bf7100 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,55 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00005-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00001-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00006-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2b002e9 --- /dev/null +++ b/README.md @@ -0,0 +1,31 @@ +--- +pipeline_tag: text-generation +library_name: transformers +license: apache-2.0 +tags: + - mixtral + - moe + - reasoning +--- + +# Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks + +This repository contains model checkpoints from the paper [Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks](https://huggingface.co/papers/2508.18672). + +For more details, including code and evaluation procedures, please refer to the official GitHub repository: [https://github.com/rioyokotalab/optimal-sparsity](https://github.com/rioyokotalab/optimal-sparsity) + +## How to cite + +If you find our work helpful, please feel free to cite the paper. + +```bibtex +@inproceedings{ + nakamura2026optimal, + title={Optimal Sparsity of Mixture-of-Experts Language Models for Reasoning Tasks}, + author={Taishi Nakamura and Satoki Ishikawa and Masaki Kawamura and Takumi Okamoto and Daisuke Nohara and Jun Suzuki and Rio Yokota}, + booktitle={The Fourteenth International Conference on Learning Representations}, + year={2026}, + url={https://openreview.net/forum?id=XFw2EPRUUR} +} +``` + diff --git a/config.json b/config.json new file mode 100644 index 0000000..aefc717 --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MixtralForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 2048, + "max_position_embeddings": 4096, + "mlp_bias": false, + "model_type": "mixtral", + "num_attention_heads": 8, + "num_experts_per_tok": 4, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "num_local_experts": 128, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "router_aux_loss_coef": 0.01, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.0", + "use_cache": true, + "vocab_size": 99584 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..f72d847 --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f10b946b4800dbce55c6cd9e41bf823a32f69d68e711f9fb255b6a4b0690a21 +size 4998261504 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..a3346a8 --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df40735b7927c2da5668c344dc711c0d67d647dc143af349b5501419cef600e8 +size 4999766216 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..da737d6 --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd030c6894064a9bd20aa7b4868134fb373fbf0c24cedc7bbd57af07804dfef +size 4999766176 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..7f7dc13 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9999ae4e011a2d25f3020435496d2bc53549fd9d99e6cae0a052eb00ccb9c857 +size 4999767040 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..b846bbf --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:331092b347caf05eaf33d815c3e63dca1db1982d112b351dd95e0af917f00b33 +size 4999767384 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..7c134d8 --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4684838191de99ac771439a588476c59dcf291edbeddd8d6de76e3aeca9d18 +size 1319669968 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..e79cdf8 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,6266 @@ +{ + "metadata": { + "total_size": 26316179456 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.48.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.49.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.50.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.51.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.52.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.53.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.54.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.55.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.56.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.57.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.58.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.59.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.60.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.61.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.62.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.63.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.64.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.65.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.66.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.67.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.68.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.69.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.70.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.71.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.72.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.73.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.74.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.75.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.76.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.77.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.78.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.79.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.80.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.81.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.82.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.83.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.84.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.85.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.86.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.87.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.88.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.89.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.90.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.91.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.92.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.93.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.94.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.95.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.96.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.97.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.98.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.99.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.100.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.101.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.102.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.103.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.104.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.105.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.106.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.107.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.108.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.109.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.110.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.111.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.112.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.113.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.114.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.115.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.116.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.117.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.118.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.119.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.120.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.121.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.122.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.123.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.124.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.125.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.126.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.0.block_sparse_moe.experts.127.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.48.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.49.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.50.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.51.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.52.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.53.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.54.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.55.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.56.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.57.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.58.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.59.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.60.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.61.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.62.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.63.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.64.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.65.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.66.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.67.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.68.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.69.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.70.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.71.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.72.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.73.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.74.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.75.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.76.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.77.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.78.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.79.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.80.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.81.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.82.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.83.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.84.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.85.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.86.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.87.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.88.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.89.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.90.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.91.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.92.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.93.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.94.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.95.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.96.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.97.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.98.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.99.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.100.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.101.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.102.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.103.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.104.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.105.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.106.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.107.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.108.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.109.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.110.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.111.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.112.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.113.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.114.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.115.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.116.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.117.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.118.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.119.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.120.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.121.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.122.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.123.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.124.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.125.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.126.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.1.block_sparse_moe.experts.127.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w1.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w3.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.48.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.49.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.50.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.51.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.52.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.53.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.54.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.55.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.56.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.57.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.58.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.59.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.60.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.61.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.62.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.63.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.64.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.65.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.66.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.67.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.68.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.69.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.70.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.71.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.72.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.73.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.74.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.75.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.76.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.77.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.78.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.79.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.80.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.81.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.82.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.83.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.84.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.85.w2.weight": "model-00001-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.86.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.87.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.88.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.89.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.90.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.91.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.92.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.93.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.94.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.95.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.96.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.97.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.98.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.99.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.100.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.101.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.102.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.103.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.104.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.105.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.106.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.107.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.108.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.109.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.110.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.111.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.112.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.113.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.114.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.115.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.116.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.117.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.118.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.119.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.120.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.121.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.122.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.123.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.124.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.125.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.126.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.2.block_sparse_moe.experts.127.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.48.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.49.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.50.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.51.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.52.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.53.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.54.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.55.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.56.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.57.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.58.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.59.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.60.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.61.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.62.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.63.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.64.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.65.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.66.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.67.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.68.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.69.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.70.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.71.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.72.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.73.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.74.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.75.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.76.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.77.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.78.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.79.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.80.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.81.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.82.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.83.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.84.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.85.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.86.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.87.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.88.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.89.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.90.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.91.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.92.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.93.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.94.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.95.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.96.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.97.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.98.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.99.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.100.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.101.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.102.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.103.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.104.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.105.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.106.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.107.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.108.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.109.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.110.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.111.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.112.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.113.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.114.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.115.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.116.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.117.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.118.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.119.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.120.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.121.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.122.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.123.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.124.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.125.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.126.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.3.block_sparse_moe.experts.127.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.48.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.49.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.50.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.51.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.52.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.53.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.54.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.55.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.56.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.57.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.58.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.59.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.60.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.61.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.62.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.63.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.64.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.65.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.66.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.67.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.68.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.69.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.70.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.71.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.72.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.73.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.74.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.75.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.76.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.77.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.78.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.79.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.80.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.81.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.82.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.83.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.84.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.85.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.86.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.87.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.88.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.89.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.90.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.91.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.92.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.93.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.94.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.95.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.96.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.97.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.98.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.99.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.100.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.101.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.102.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.103.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.104.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.105.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.106.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.107.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.108.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.109.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.110.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.111.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.112.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.113.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.114.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.115.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.116.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.117.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.118.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.119.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.120.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.121.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.122.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.123.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.124.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.125.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.126.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.4.block_sparse_moe.experts.127.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w1.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w3.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.48.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.49.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.50.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.51.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.52.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.53.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.54.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.55.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.56.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.57.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.58.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.59.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.60.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.61.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.62.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.63.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.64.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.65.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.66.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.67.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.68.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.69.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.70.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.71.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.72.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.73.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.74.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.75.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.76.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.77.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.78.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.79.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.80.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.81.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.82.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.83.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.84.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.85.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.86.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.87.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.88.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.89.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.90.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.91.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.92.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.93.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.94.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.95.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.96.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.97.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.98.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.99.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.100.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.101.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.102.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.103.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.104.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.105.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.106.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.107.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.108.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.109.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.110.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.111.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.112.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.113.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.114.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.115.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.116.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.117.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.118.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.119.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.120.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.121.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.122.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.123.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.124.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.125.w2.weight": "model-00002-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.126.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.5.block_sparse_moe.experts.127.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.40.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.41.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.42.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.43.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.44.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.45.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.46.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.47.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.48.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.49.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.50.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.51.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.52.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.53.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.54.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.55.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.56.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.57.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.58.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.59.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.60.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.61.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.62.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.63.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.64.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.65.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.66.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.67.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.68.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.69.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.70.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.71.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.72.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.73.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.74.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.75.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.76.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.77.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.78.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.79.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.80.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.81.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.82.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.83.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.84.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.85.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.86.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.87.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.88.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.89.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.90.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.91.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.92.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.93.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.94.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.95.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.96.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.97.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.98.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.99.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.100.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.101.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.102.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.103.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.104.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.105.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.106.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.107.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.108.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.109.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.110.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.111.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.112.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.113.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.114.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.115.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.116.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.117.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.118.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.119.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.120.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.121.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.122.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.123.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.124.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.125.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.126.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.6.block_sparse_moe.experts.127.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.40.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.41.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.42.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.43.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.44.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.45.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.46.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.47.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.48.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.49.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.50.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.51.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.52.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.53.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.54.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.55.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.56.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.57.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.58.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.59.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.60.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.61.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.62.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.63.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.64.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.65.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.66.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.67.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.68.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.69.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.70.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.71.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.72.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.73.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.74.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.75.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.76.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.77.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.78.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.79.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.80.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.81.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.82.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.83.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.84.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.85.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.86.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.87.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.88.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.89.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.90.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.91.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.92.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.93.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.94.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.95.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.96.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.97.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.98.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.99.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.100.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.101.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.102.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.103.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.104.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.105.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.106.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.107.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.108.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.109.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.110.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.111.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.112.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.113.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.114.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.115.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.116.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.117.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.118.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.119.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.120.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.121.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.122.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.123.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.124.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.125.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.126.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.7.block_sparse_moe.experts.127.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.16.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.17.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.18.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.19.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.20.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.21.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.22.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.23.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.24.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.25.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.26.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.27.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.28.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.29.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.30.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.31.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.32.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.33.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.34.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.35.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.36.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.37.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.38.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.39.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.40.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.41.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.42.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.43.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.44.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.45.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.46.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.47.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.48.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.49.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.50.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.51.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.52.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.53.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.54.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.55.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.56.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.57.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.58.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.59.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.60.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.61.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.62.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.63.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.64.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.65.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.66.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.67.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.68.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.69.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.70.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.71.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.72.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.73.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.74.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.75.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.76.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.77.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.78.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.79.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.80.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.81.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.82.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.83.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.84.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.85.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.86.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.87.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.88.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.89.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.90.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.91.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.92.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.93.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.94.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.95.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.96.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.97.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.98.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.99.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.100.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.101.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.102.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.103.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.104.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.105.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.106.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.107.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.108.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.109.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.110.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.111.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.112.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.113.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.114.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.115.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.116.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.117.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.118.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.119.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.120.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.121.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.122.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.123.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.124.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.125.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.126.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.8.block_sparse_moe.experts.127.w2.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w1.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w3.weight": "model-00003-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.24.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.25.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.26.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.27.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.28.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.29.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.30.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.31.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.48.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.49.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.50.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.51.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.52.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.53.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.54.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.55.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.56.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.57.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.58.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.59.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.60.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.61.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.62.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.63.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.64.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.65.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.66.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.67.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.68.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.69.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.70.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.71.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.72.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.73.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.74.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.75.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.76.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.77.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.78.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.79.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.80.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.81.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.82.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.83.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.84.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.85.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.86.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.87.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.88.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.89.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.90.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.91.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.92.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.93.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.94.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.95.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.96.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.97.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.98.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.99.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.100.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.101.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.102.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.103.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.104.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.105.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.106.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.107.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.108.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.109.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.110.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.111.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.112.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.113.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.114.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.115.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.116.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.117.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.118.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.119.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.120.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.121.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.122.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.123.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.124.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.125.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.126.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.9.block_sparse_moe.experts.127.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.24.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.25.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.26.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.27.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.28.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.29.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.30.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.31.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.48.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.49.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.50.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.51.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.52.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.53.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.54.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.55.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.56.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.57.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.58.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.59.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.60.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.61.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.62.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.63.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.64.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.65.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.66.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.67.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.68.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.69.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.70.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.71.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.72.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.73.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.74.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.75.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.76.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.77.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.78.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.79.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.80.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.81.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.82.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.83.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.84.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.85.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.86.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.87.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.88.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.89.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.90.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.91.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.92.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.93.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.94.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.95.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.96.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.97.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.98.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.99.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.100.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.101.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.102.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.103.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.104.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.105.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.106.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.107.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.108.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.109.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.110.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.111.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.112.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.113.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.114.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.115.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.116.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.117.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.118.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.119.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.120.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.121.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.122.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.123.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.124.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.125.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.126.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.10.block_sparse_moe.experts.127.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.16.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.17.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.18.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.19.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.20.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.21.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.22.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.23.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.24.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.25.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.26.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.27.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.28.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.29.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.30.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.31.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.32.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.33.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.34.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.35.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.36.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.37.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.38.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.39.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.40.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.41.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.42.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.43.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.44.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.45.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.46.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.47.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.48.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.49.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.50.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.51.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.52.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.53.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.54.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.55.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.56.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.57.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.58.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.59.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.60.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.61.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.62.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.63.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.64.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.65.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.66.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.67.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.68.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.69.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.70.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.71.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.72.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.73.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.74.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.75.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.76.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.77.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.78.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.79.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.80.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.81.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.82.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.83.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.84.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.85.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.86.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.87.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.88.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.89.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.90.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.91.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.92.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.93.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.94.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.95.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.96.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.97.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.98.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.99.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.100.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.101.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.102.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.103.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.104.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.105.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.106.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.107.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.108.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.109.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.110.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.111.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.112.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.113.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.114.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.115.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.116.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.117.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.118.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.119.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.120.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.121.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.122.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.123.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.124.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.125.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.126.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.11.block_sparse_moe.experts.127.w2.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w1.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w3.weight": "model-00004-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.16.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.17.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.18.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.19.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.32.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.33.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.34.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.35.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.36.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.37.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.38.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.39.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.40.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.41.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.42.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.43.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.44.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.45.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.46.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.47.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.64.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.65.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.66.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.67.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.68.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.69.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.70.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.71.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.72.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.73.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.74.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.75.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.76.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.77.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.78.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.79.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.80.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.81.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.82.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.83.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.84.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.85.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.86.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.87.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.88.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.89.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.90.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.91.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.92.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.93.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.94.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.95.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.96.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.97.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.98.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.99.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.100.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.101.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.102.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.103.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.104.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.105.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.106.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.107.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.108.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.109.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.110.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.111.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.112.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.113.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.114.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.115.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.116.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.117.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.118.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.119.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.120.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.121.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.122.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.123.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.124.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.125.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.126.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.12.block_sparse_moe.experts.127.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.16.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.17.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.18.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.19.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.32.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.33.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.34.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.35.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.36.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.37.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.38.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.39.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.40.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.41.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.42.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.43.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.44.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.45.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.46.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.47.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.64.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.65.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.66.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.67.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.68.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.69.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.70.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.71.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.72.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.73.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.74.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.75.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.76.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.77.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.78.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.79.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.80.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.81.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.82.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.83.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.84.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.85.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.86.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.87.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.88.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.89.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.90.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.91.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.92.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.93.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.94.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.95.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.96.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.97.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.98.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.99.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.100.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.101.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.102.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.103.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.104.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.105.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.106.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.107.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.108.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.109.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.110.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.111.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.112.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.113.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.114.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.115.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.116.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.117.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.118.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.119.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.120.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.121.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.122.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.123.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.124.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.125.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.126.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.13.block_sparse_moe.experts.127.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.16.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.17.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.18.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.19.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.20.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.21.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.22.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.23.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.24.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.25.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.26.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.27.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.28.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.29.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.30.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.31.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.32.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.33.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.34.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.35.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.36.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.37.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.38.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.39.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.40.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.41.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.42.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.43.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.44.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.45.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.46.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.47.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.48.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.49.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.50.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.51.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.52.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.53.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.54.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.55.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.56.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.57.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.58.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.59.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.60.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.61.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.62.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.63.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.64.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.65.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.66.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.67.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.68.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.69.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.70.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.71.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.72.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.73.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.74.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.75.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.76.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.77.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.78.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.79.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.80.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.81.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.82.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.83.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.84.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.85.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.86.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.87.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.88.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.89.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.90.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.91.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.92.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.93.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.94.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.95.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.96.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.97.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.98.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.99.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.100.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.101.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.102.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.103.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.104.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.105.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.106.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.107.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.108.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.109.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.110.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.111.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.112.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.113.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.114.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.115.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.116.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.117.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.118.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.119.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.120.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.121.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.122.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.123.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.124.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.125.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.126.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.14.block_sparse_moe.experts.127.w2.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w1.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w3.weight": "model-00005-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w1.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w3.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.16.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.17.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.18.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.19.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.20.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.21.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.22.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.23.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.24.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.25.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.26.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.27.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.28.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.29.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.30.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.31.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.32.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.33.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.34.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.35.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.36.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.37.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.38.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.39.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.40.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.41.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.42.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.43.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.44.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.45.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.46.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.47.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.48.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.49.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.50.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.51.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.52.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.53.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.54.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.55.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.56.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.57.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.58.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.59.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.60.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.61.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.62.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.63.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.64.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.65.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.66.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.67.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.68.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.69.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.70.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.71.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.72.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.73.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.74.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.75.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.76.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.77.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.78.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.79.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.80.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.81.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.82.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.83.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.84.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.85.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.86.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.87.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.88.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.89.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.90.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.91.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.92.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.93.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.94.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.95.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.96.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.97.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.98.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.99.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.100.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.101.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.102.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.103.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.104.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.105.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.106.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.107.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.108.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.109.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.110.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.111.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.112.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.113.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.114.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.115.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.116.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.117.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.118.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.119.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.120.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.121.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.122.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.123.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.124.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.125.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.126.w2.weight": "model-00006-of-00006.safetensors", + "model.layers.15.block_sparse_moe.experts.127.w2.weight": "model-00006-of-00006.safetensors", + "model.norm.weight": "model-00006-of-00006.safetensors", + "lm_head.weight": "model-00006-of-00006.safetensors" + } +} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8644c8f --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,10 @@ +{ + "bos_token": "", + "cls_token": "", + "eod_token": "", + "eos_token": "", + "mask_token": "", + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..fc80107 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955dc1fa623fab38cc92a3f4ee172423ae6d73201c4207569bfdf5626bc733f0 +size 6416433 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..09aa857 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,18 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "unk_token": "", + "bos_token": "", + "eos_token": "", + "pad_token": "", + "cls_token": "", + "sep_token": "", + "eod_token": "", + "mask_token": "", + "extra_ids": 0, + "sp_model_kwargs": {}, + "model_max_length": 1000000000000000019884624838656, + "clean_up_tokenization_spaces": false, + "special_tokens_map_file": null, + "tokenizer_class": "PreTrainedTokenizerFast" +}