diff --git a/README.md b/README.md index e70a571..154df82 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,3 @@ --- -frameworks: -- Pytorch -license: Apache License 2.0 -tasks: -- text-generation - -#model-type: -##如 gpt、phi、llama、chatglm、baichuan 等 -#- gpt - -#domain: -##如 nlp、cv、audio、multi-modal -#- nlp - -#language: -##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa -#- cn - -#metrics: -##如 CIDEr、Blue、ROUGE 等 -#- CIDEr - -#tags: -##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他 -#- pretrained - -#tools: -##如 vllm、fastchat、llamacpp、AdaSeq 等 -#- vllm +license: apache-2.0 --- -### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。 -#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型 - -SDK下载 -```bash -#安装ModelScope -pip install modelscope -``` -```python -#SDK模型下载 -from modelscope import snapshot_download -model_dir = snapshot_download('PAI/DistilQwen2.5-DS3-0324-7B') -``` -Git下载 -``` -#Git模型下载 -git clone https://www.modelscope.cn/PAI/DistilQwen2.5-DS3-0324-7B.git -``` - -

如果您是本模型的贡献者,我们邀请您根据模型贡献文档,及时完善模型卡片内容。

\ No newline at end of file diff --git a/config.json b/config.json index 5c32c35..5d770b5 100644 --- a/config.json +++ b/config.json @@ -1,7 +1,7 @@ { - "_name_or_path": "/mnt/data/models/Qwen2___5-7B-Instruct", + "_name_or_path": "/mnt/workspace/DistilQwen25-DS3-0324/7B", "architectures": [ - "Qwen2ForCausalLM" + "Qwen2Model" ], "attention_dropout": 0.0, "bos_token_id": 151643, @@ -21,8 +21,8 @@ "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, - "torch_dtype": "bfloat16", - "transformers_version": "4.46.1", + "torch_dtype": "float32", + "transformers_version": "4.48.3", "use_cache": false, "use_sliding_window": false, "vocab_size": 152064 diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..064d87f --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcadb38a84e9a2543c4b0cd68831a9bb3187596978037ea6060fa61a80915bbe +size 135 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..0ef0b3d --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5bdd9a2f8f6a37fa9ac51de2c62063010215dc19d03a51a6037617c71c6f5a +size 135 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..c358645 --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19397e19da02ec48dad3736e1251263c9d1ecf00d8c56fd6d1746178f24d8079 +size 135 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..a93ef46 --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e9bae7167667fb4b26c35e4b5b3d313208d8e2f5b356b4df5130d81f846f61 +size 135 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..95c1994 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e226e184cc3d4bf9b29bd72cb7129e5e8d1b9ab61a20ad9d24302ec369fa37e +size 135 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..8b8bd4e --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4588b1a2ad133e3963a53d120640ff282b62cc891b6244ac3936cffa3310cb68 +size 135 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 6ca5084..8808252 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,346 +1,345 @@ { "metadata": { - "total_size": 15231233024 + "total_size": 28282476544 }, "weight_map": { - "lm_head.weight": "model-00004-of-00004.safetensors", - "model.embed_tokens.weight": "model-00001-of-00004.safetensors", - "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", - "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", - "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", - "model.norm.weight": "model-00003-of-00004.safetensors" + "embed_tokens.weight": "model-00001-of-00006.safetensors", + "layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.0.self_attn.k_proj.bias": "model-00001-of-00006.safetensors", + "layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.self_attn.q_proj.bias": "model-00001-of-00006.safetensors", + "layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "layers.0.self_attn.v_proj.bias": "model-00001-of-00006.safetensors", + "layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.1.self_attn.k_proj.bias": "model-00001-of-00006.safetensors", + "layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.self_attn.q_proj.bias": "model-00001-of-00006.safetensors", + "layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "layers.1.self_attn.v_proj.bias": "model-00001-of-00006.safetensors", + "layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "layers.10.input_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.10.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.10.self_attn.k_proj.bias": "model-00003-of-00006.safetensors", + "layers.10.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.self_attn.q_proj.bias": "model-00003-of-00006.safetensors", + "layers.10.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "layers.10.self_attn.v_proj.bias": "model-00003-of-00006.safetensors", + "layers.10.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.input_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.11.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.11.self_attn.k_proj.bias": "model-00003-of-00006.safetensors", + "layers.11.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.self_attn.q_proj.bias": "model-00003-of-00006.safetensors", + "layers.11.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "layers.11.self_attn.v_proj.bias": "model-00003-of-00006.safetensors", + "layers.11.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.input_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.12.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.12.self_attn.k_proj.bias": "model-00003-of-00006.safetensors", + "layers.12.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.self_attn.q_proj.bias": "model-00003-of-00006.safetensors", + "layers.12.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "layers.12.self_attn.v_proj.bias": "model-00003-of-00006.safetensors", + "layers.12.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "layers.13.input_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.13.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "layers.13.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.13.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.13.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.13.self_attn.k_proj.bias": "model-00003-of-00006.safetensors", + "layers.13.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "layers.13.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "layers.13.self_attn.q_proj.bias": "model-00003-of-00006.safetensors", + "layers.13.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "layers.13.self_attn.v_proj.bias": "model-00003-of-00006.safetensors", + "layers.13.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "layers.14.input_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.14.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.14.self_attn.k_proj.bias": "model-00004-of-00006.safetensors", + "layers.14.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.self_attn.q_proj.bias": "model-00004-of-00006.safetensors", + "layers.14.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "layers.14.self_attn.v_proj.bias": "model-00004-of-00006.safetensors", + "layers.14.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.input_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.15.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.15.self_attn.k_proj.bias": "model-00004-of-00006.safetensors", + "layers.15.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.self_attn.q_proj.bias": "model-00004-of-00006.safetensors", + "layers.15.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "layers.15.self_attn.v_proj.bias": "model-00004-of-00006.safetensors", + "layers.15.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.input_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.16.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.16.self_attn.k_proj.bias": "model-00004-of-00006.safetensors", + "layers.16.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.self_attn.q_proj.bias": "model-00004-of-00006.safetensors", + "layers.16.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "layers.16.self_attn.v_proj.bias": "model-00004-of-00006.safetensors", + "layers.16.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "layers.17.self_attn.k_proj.bias": "model-00004-of-00006.safetensors", + "layers.17.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.self_attn.q_proj.bias": "model-00004-of-00006.safetensors", + "layers.17.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "layers.17.self_attn.v_proj.bias": "model-00004-of-00006.safetensors", + "layers.17.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.18.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.18.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.18.self_attn.k_proj.bias": "model-00004-of-00006.safetensors", + "layers.18.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.self_attn.q_proj.bias": "model-00004-of-00006.safetensors", + "layers.18.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "layers.18.self_attn.v_proj.bias": "model-00004-of-00006.safetensors", + "layers.18.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "layers.19.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.19.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.19.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.19.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.19.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.19.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.19.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "layers.2.self_attn.k_proj.bias": "model-00001-of-00006.safetensors", + "layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.self_attn.q_proj.bias": "model-00001-of-00006.safetensors", + "layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "layers.2.self_attn.v_proj.bias": "model-00001-of-00006.safetensors", + "layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "layers.20.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.20.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.20.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.20.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.20.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.20.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.20.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.21.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.21.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.21.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.21.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.21.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.21.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.22.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.22.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.22.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.22.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.22.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.22.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "layers.23.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.23.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.23.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.23.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.23.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.24.input_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.24.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "layers.24.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "layers.24.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "layers.24.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.24.self_attn.k_proj.bias": "model-00005-of-00006.safetensors", + "layers.24.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "layers.24.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "layers.24.self_attn.q_proj.bias": "model-00005-of-00006.safetensors", + "layers.24.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "layers.24.self_attn.v_proj.bias": "model-00005-of-00006.safetensors", + "layers.24.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "layers.25.input_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.25.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.25.self_attn.k_proj.bias": "model-00006-of-00006.safetensors", + "layers.25.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.self_attn.q_proj.bias": "model-00006-of-00006.safetensors", + "layers.25.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "layers.25.self_attn.v_proj.bias": "model-00006-of-00006.safetensors", + "layers.25.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.input_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.26.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.26.self_attn.k_proj.bias": "model-00006-of-00006.safetensors", + "layers.26.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.self_attn.q_proj.bias": "model-00006-of-00006.safetensors", + "layers.26.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "layers.26.self_attn.v_proj.bias": "model-00006-of-00006.safetensors", + "layers.26.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.input_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.27.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "layers.27.self_attn.k_proj.bias": "model-00006-of-00006.safetensors", + "layers.27.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.self_attn.q_proj.bias": "model-00006-of-00006.safetensors", + "layers.27.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "layers.27.self_attn.v_proj.bias": "model-00006-of-00006.safetensors", + "layers.27.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "layers.3.input_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.3.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.3.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.3.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.3.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.3.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.3.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.input_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.4.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.4.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.4.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.4.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.4.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.4.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.input_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.5.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.5.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.5.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.5.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.5.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.5.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.6.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.6.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.6.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.6.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.6.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "layers.7.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.7.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.7.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.7.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.7.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.8.input_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.8.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "layers.8.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.8.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "layers.8.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.8.self_attn.k_proj.bias": "model-00002-of-00006.safetensors", + "layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "layers.8.self_attn.q_proj.bias": "model-00002-of-00006.safetensors", + "layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "layers.8.self_attn.v_proj.bias": "model-00002-of-00006.safetensors", + "layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "layers.9.input_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.9.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "layers.9.self_attn.k_proj.bias": "model-00003-of-00006.safetensors", + "layers.9.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.self_attn.q_proj.bias": "model-00003-of-00006.safetensors", + "layers.9.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "layers.9.self_attn.v_proj.bias": "model-00003-of-00006.safetensors", + "layers.9.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "norm.weight": "model-00006-of-00006.safetensors" } } diff --git a/tokenizer_config.json b/tokenizer_config.json index 3b82aee..78c0d7b 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -199,6 +199,7 @@ "clean_up_tokenization_spaces": false, "eos_token": "<|im_end|>", "errors": "replace", + "extra_special_tokens": {}, "model_max_length": 131072, "pad_token": "<|endoftext|>", "padding_side": "right",