From c2013d96d1976bb4d400be3302fd68dc6d5bc5ce Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 2 Jun 2026 23:06:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: maywell/Mistral-ko-7B-v0.1 Source: Original Platform --- .gitattributes | 57 ++++++ README.md | 29 +++ config.json | 35 ++++ configuration.json | 1 + generation_config.json | 7 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 298 +++++++++++++++++++++++++++++++ pytorch_model-00001-of-00004.bin | 3 + pytorch_model-00002-of-00004.bin | 3 + pytorch_model-00003-of-00004.bin | 3 + pytorch_model-00004-of-00004.bin | 3 + pytorch_model.bin.index.json | 298 +++++++++++++++++++++++++++++++ special_tokens_map.json | 35 ++++ tokenizer.json | 3 + tokenizer_config.json | 53 ++++++ 18 files changed, 840 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 pytorch_model-00001-of-00004.bin create mode 100644 pytorch_model-00002-of-00004.bin create mode 100644 pytorch_model-00003-of-00004.bin create mode 100644 pytorch_model-00004-of-00004.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..492915e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,57 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text + +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +pytorch_model-00001-of-00004.bin filter=lfs diff=lfs merge=lfs -text +pytorch_model-00002-of-00004.bin filter=lfs diff=lfs merge=lfs -text +model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +pytorch_model-00004-of-00004.bin filter=lfs diff=lfs merge=lfs -text +pytorch_model-00003-of-00004.bin filter=lfs diff=lfs merge=lfs -text +model-00004-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..281b9c7 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +--- +license: cc-by-nc-4.0 +language: +- ko +pipeline_tag: text-generation +--- + +# 해당 모델은 오래된 실험용입니다. 실 사용을 권장하지 않습니다. + +# Mistral-ko-7B-v0.1 + +# **Model Details** + +### Description +Mistral-ko-7B-v0.1는 미스트랄에 한국어에 최적화 된 토크나이저를 적용한 모델입니다. Raw Data로 어느정도 형성된 모델에 시나트라에 사용 된 데이터셋으로 2 Epoch 훈련되었습니다. + +-- Further Description After Evaluation -- + +## Comment + +토크나이저는 @beomi님의 라마2 한국어 버전을 기반으로 제작되었습니다. + +기반 모델을 제공해주신 @jin05102518님께 감사드립니다. + +Follow me on twitter: https://twitter.com/stablefluffy + +Consider Support me making these model alone: https://www.buymeacoffee.com/mwell or with Runpod Credit Gift 💕 + +Contact me on Telegram: https://t.me/AlzarTakkarsen \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..4689cca --- /dev/null +++ b/config.json @@ -0,0 +1,35 @@ +{ + "_name_or_path": "maywell/Mistral-ko-7B-v0.1", + "architectures": [ + "MistralForCausalLM" + ], + "bos_token_id": 1, + "c_max_position_embeddings": 4096, + "eos_token_id": 46080, + "flash_attn_key_chunk_size": 1024, + "flash_attn_query_chunk_size": 1024, + "freq_max_position_embeddings": 32768, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "number_rep_kv": 1, + "pad_token_id": 2, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "scan_mlp_chunk_size": 1024, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.35.1", + "use_cache": false, + "use_flash_attention": false, + "use_pjit_attention_force": true, + "use_sacn_mlp": false, + "vocab_size": 46081 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2acf645 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2, + "transformers_version": "4.35.1" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..4069241 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f11cb1c665f2da8d0fa07f53b9e286e4357bf0dd0a816127a655585ef71253 +size 4999931816 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..608c032 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1c6c9c08e78db890284d929f989c59b56d68c206904dc1dab1df3600c4247b +size 4966461192 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..f4757ef --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc869594d1f96a91b99ef6979b168b99b3f3b1b2205a1bb1dcb71efd1dcc13b +size 4748340048 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..ae0c42e --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d41b42090932d10f8e3ac0c5672feb71c57e65501ef57ae3369d8c512dff5c +size 754991232 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..b971f68 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 15469690880 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors" + } +} \ No newline at end of file diff --git a/pytorch_model-00001-of-00004.bin b/pytorch_model-00001-of-00004.bin new file mode 100644 index 0000000..dad2631 --- /dev/null +++ b/pytorch_model-00001-of-00004.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b9903c38a03af9ea6dd2c3df91fa46f633506cb53b1e5eacf5b5a6fc3fe3b8d +size 4999952571 diff --git a/pytorch_model-00002-of-00004.bin b/pytorch_model-00002-of-00004.bin new file mode 100644 index 0000000..3be03fd --- /dev/null +++ b/pytorch_model-00002-of-00004.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cceb3bb5f3127277e047587ee9264103b1fc8a27d5d5e0a609329ff6e316ad9 +size 4966486094 diff --git a/pytorch_model-00003-of-00004.bin b/pytorch_model-00003-of-00004.bin new file mode 100644 index 0000000..06726ba --- /dev/null +++ b/pytorch_model-00003-of-00004.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7300036bb6b13c6db3dd1f9f828a0aa00af8ffabec4ecb4bd6c4ffeb7aeb0728 +size 4748363053 diff --git a/pytorch_model-00004-of-00004.bin b/pytorch_model-00004-of-00004.bin new file mode 100644 index 0000000..ee8f4f3 --- /dev/null +++ b/pytorch_model-00004-of-00004.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ede41c1b0257eb608552796abbddd2e88ac7baaacbdc13eb012e0668229537 +size 754992042 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..e6e4aea --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 15469690880 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00004-of-00004.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin", + "model.norm.weight": "pytorch_model-00003-of-00004.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..eea7942 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,35 @@ +{ + "additional_special_tokens": [ + "", + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..2e6a9d6 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae3f60002872e19570f698f3a991e984fa2ee4860684361bca51123efd10f759 +size 2495819 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..975c281 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,53 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46080": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "truncation_side": "left", + "unk_token": "", + "use_default_system_prompt": true +}