From 8d1d5a8d7d5b720e19d817aa09981c97c60dfd61 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 25 May 2026 05:29:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: uukuguy/speechless-hermes-coig-lite-13b Source: Original Platform --- .gitattributes | 38 +++ README.md | 181 ++++++++++++++ added_tokens.json | 3 + config.json | 26 ++ generation_config.json | 9 + model-00001-of-00003.safetensors | 3 + model-00002-of-00003.safetensors | 3 + model-00003-of-00003.safetensors | 3 + model.safetensors.index.json | 410 +++++++++++++++++++++++++++++++ pytorch_model-00001-of-00003.bin | 3 + pytorch_model-00002-of-00003.bin | 3 + pytorch_model-00003-of-00003.bin | 3 + pytorch_model.bin.index.json | 410 +++++++++++++++++++++++++++++++ special_tokens_map.json | 24 ++ tokenizer.model | 3 + tokenizer_config.json | 34 +++ 16 files changed, 1156 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00003.safetensors create mode 100644 model-00002-of-00003.safetensors create mode 100644 model-00003-of-00003.safetensors create mode 100644 model.safetensors.index.json create mode 100644 pytorch_model-00001-of-00003.bin create mode 100644 pytorch_model-00002-of-00003.bin create mode 100644 pytorch_model-00003-of-00003.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..48997bc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +model-00001-of-00003.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00003.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00003.safetensors filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..7fc920a --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ + +--- +language: +- en +tags: +- llama-2 +- self-instruct +- distillation +- synthetic instruction +license: +- mit +--- + +# Model Card: speechless-hermes-coig-lite-13b + +Fine-tune the Nous-Hermes-Llama2-13b with COIG-PC-LITE for Chinese capability. + + +# Model Card: Nous-Hermes-Llama2-13b + +Compute provided by our project sponsor Redmond AI, thank you! Follow RedmondAI on Twitter @RedmondAI. + +## Model Description + +Nous-Hermes-Llama2-13b is a state-of-the-art language model fine-tuned on over 300,000 instructions. This model was fine-tuned by Nous Research, with Teknium and Emozilla leading the fine tuning process and dataset curation, Redmond AI sponsoring the compute, and several other contributors. + +This Hermes model uses the exact same dataset as Hermes on Llama-1. This is to ensure consistency between the old Hermes and new, for anyone who wanted to keep Hermes as similar to the old one, just more capable. + +This model stands out for its long responses, lower hallucination rate, and absence of OpenAI censorship mechanisms. The fine-tuning process was performed with a 4096 sequence length on an 8x a100 80GB DGX machine. + +## Example Outputs: +![Example4](https://huggingface.co/NousResearch/Nous-Hermes-Llama2-13b/resolve/main/example5.png "Example 4") +![Example1](https://huggingface.co/NousResearch/Nous-Hermes-Llama2-13b/resolve/main/Example1.png "Example 1") +![Example2](https://huggingface.co/NousResearch/Nous-Hermes-Llama2-13b/resolve/main/example2.png "Example 2") +![Example3](https://huggingface.co/NousResearch/Nous-Hermes-Llama2-13b/resolve/main/example3.png "Example 3") + +## Model Training + +The model was trained almost entirely on synthetic GPT-4 outputs. Curating high quality GPT-4 datasets enables incredibly high quality in knowledge, task completion, and style. + +This includes data from diverse sources such as GPTeacher, the general, roleplay v1&2, code instruct datasets, Nous Instruct & PDACTL (unpublished), and several others, detailed further below + +## Collaborators +The model fine-tuning and the datasets were a collaboration of efforts and resources between Teknium, Karan4D, Emozilla, Huemin Art, and Redmond AI. + +Special mention goes to @winglian for assisting in some of the training issues. + +Huge shoutout and acknowledgement is deserved for all the dataset creators who generously share their datasets openly. + +Among the contributors of datasets: +- GPTeacher was made available by Teknium +- Wizard LM by nlpxucan +- Nous Research Instruct Dataset was provided by Karan4D and HueminArt. +- GPT4-LLM and Unnatural Instructions were provided by Microsoft +- Airoboros dataset by jondurbin +- Camel-AI's domain expert datasets are from Camel-AI +- CodeAlpaca dataset by Sahil 2801. + +If anyone was left out, please open a thread in the community tab. + +## Prompt Format + +The model follows the Alpaca prompt format: +``` +### Instruction: + + +### Response: + + +``` + +or + +``` +### Instruction: + + +### Input: + + +### Response: + + +``` + +## Benchmark Results +AGI-Eval +``` +| Task |Version| Metric |Value | |Stderr| +|agieval_aqua_rat | 0|acc |0.2362|± |0.0267| +| | |acc_norm|0.2480|± |0.0272| +|agieval_logiqa_en | 0|acc |0.3425|± |0.0186| +| | |acc_norm|0.3472|± |0.0187| +|agieval_lsat_ar | 0|acc |0.2522|± |0.0287| +| | |acc_norm|0.2087|± |0.0269| +|agieval_lsat_lr | 0|acc |0.3510|± |0.0212| +| | |acc_norm|0.3627|± |0.0213| +|agieval_lsat_rc | 0|acc |0.4647|± |0.0305| +| | |acc_norm|0.4424|± |0.0303| +|agieval_sat_en | 0|acc |0.6602|± |0.0331| +| | |acc_norm|0.6165|± |0.0340| +|agieval_sat_en_without_passage| 0|acc |0.4320|± |0.0346| +| | |acc_norm|0.4272|± |0.0345| +|agieval_sat_math | 0|acc |0.2909|± |0.0307| +| | |acc_norm|0.2727|± |0.0301| +``` +GPT-4All Benchmark Set +``` +| Task |Version| Metric |Value | |Stderr| +|arc_challenge| 0|acc |0.5102|± |0.0146| +| | |acc_norm|0.5213|± |0.0146| +|arc_easy | 0|acc |0.7959|± |0.0083| +| | |acc_norm|0.7567|± |0.0088| +|boolq | 1|acc |0.8394|± |0.0064| +|hellaswag | 0|acc |0.6164|± |0.0049| +| | |acc_norm|0.8009|± |0.0040| +|openbookqa | 0|acc |0.3580|± |0.0215| +| | |acc_norm|0.4620|± |0.0223| +|piqa | 0|acc |0.7992|± |0.0093| +| | |acc_norm|0.8069|± |0.0092| +|winogrande | 0|acc |0.7127|± |0.0127| +``` +BigBench Reasoning Test +``` +| Task |Version| Metric |Value | |Stderr| + +|bigbench_causal_judgement | 0|multiple_choice_grade|0.5526|± |0.0362| +|bigbench_date_understanding | 0|multiple_choice_grade|0.7344|± |0.0230| +|bigbench_disambiguation_qa | 0|multiple_choice_grade|0.2636|± |0.0275| +|bigbench_geometric_shapes | 0|multiple_choice_grade|0.0195|± |0.0073| +| | |exact_str_match |0.0000|± |0.0000| +|bigbench_logical_deduction_five_objects | 0|multiple_choice_grade|0.2760|± |0.0200| +|bigbench_logical_deduction_seven_objects | 0|multiple_choice_grade|0.2100|± |0.0154| +|bigbench_logical_deduction_three_objects | 0|multiple_choice_grade|0.4400|± |0.0287| +|bigbench_movie_recommendation | 0|multiple_choice_grade|0.2440|± |0.0192| +|bigbench_navigate | 0|multiple_choice_grade|0.4950|± |0.0158| +|bigbench_reasoning_about_colored_objects | 0|multiple_choice_grade|0.5570|± |0.0111| +|bigbench_ruin_names | 0|multiple_choice_grade|0.3728|± |0.0229| +|bigbench_salient_translation_error_detection | 0|multiple_choice_grade|0.1854|± |0.0123| +|bigbench_snarks | 0|multiple_choice_grade|0.6298|± |0.0360| +|bigbench_sports_understanding | 0|multiple_choice_grade|0.6156|± |0.0155| +|bigbench_temporal_sequences | 0|multiple_choice_grade|0.3140|± |0.0147| +|bigbench_tracking_shuffled_objects_five_objects | 0|multiple_choice_grade|0.2032|± |0.0114| +|bigbench_tracking_shuffled_objects_seven_objects| 0|multiple_choice_grade|0.1406|± |0.0083| +|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|0.4400|± |0.0287| +``` + +These are the highest benchmarks Hermes has seen on every metric, achieving the following average scores: +- GPT4All benchmark average is now 70.0 - from 68.8 in Hermes-Llama1 +- 0.3657 on BigBench, up from 0.328 on hermes-llama1 +- 0.372 on AGIEval, up from 0.354 on Hermes-llama1 + +These benchmarks currently have us at #1 on ARC-c, ARC-e, Hellaswag, and OpenBookQA, and 2nd place on Winogrande, comparing to GPT4all's benchmarking list, supplanting Hermes 1 for the new top position. + +## Resources for Applied Use Cases: +Check out LM Studio for a nice chatgpt style interface here: https://lmstudio.ai/ +For an example of a back and forth chatbot using huggingface transformers and discord, check out: https://github.com/teknium1/alpaca-discord +For an example of a roleplaying discord chatbot, check out this: https://github.com/teknium1/alpaca-roleplay-discordbot + +## Future Plans +We plan to continue to iterate on both more high quality data, and new data filtering techniques to eliminate lower quality data going forward. + +## Model Usage +The model is available for download on Hugging Face. It is suitable for a wide range of language tasks, from generating creative text to understanding and following complex instructions. + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) + +# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) +Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_uukuguy__speechless-hermes-coig-lite-13b) + +| Metric | Value | +|-----------------------|---------------------------| +| Avg. | 53.31 | +| ARC (25-shot) | 59.47 | +| HellaSwag (10-shot) | 82.28 | +| MMLU (5-shot) | 55.18 | +| TruthfulQA (0-shot) | 47.6 | +| Winogrande (5-shot) | 78.61 | +| GSM8K (5-shot) | 10.77 | +| DROP (3-shot) | 39.25 | diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..9c16aa4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,3 @@ +{ + "": 32000 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..fa4cd98 --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "/opt/local/llm_models/huggingface.co//NousResearch/Nous-Hermes-Llama2-13b", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.32.0.dev0", + "use_cache": true, + "vocab_size": 32001 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..d71519f --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "temperature": 0.9, + "top_p": 0.6, + "transformers_version": "4.32.0.dev0" +} diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..917fca3 --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a6eca9271e7991e722bff3d0e0d0617224a1747d08a6af3113b19eac1731611 +size 9948701776 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..e524d59 --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa67e98da7fdccff4d61e9622cc4b433691df35a8c292c2220fdb6c00921cedf +size 9904127520 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..211eecc --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978238eba3c6743d5f74c0853943dfa9fb76d41f9e8c6d61da88c2ccc5c94652 +size 6178971472 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..edbe6cf --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,410 @@ +{ + "metadata": { + "total_size": 26031759360 + }, + "weight_map": { + "lm_head.weight": "model-00003-of-00003.safetensors", + "model.embed_tokens.weight": "model-00001-of-00003.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.norm.weight": "model-00003-of-00003.safetensors" + } +} \ No newline at end of file diff --git a/pytorch_model-00001-of-00003.bin b/pytorch_model-00001-of-00003.bin new file mode 100644 index 0000000..8edd378 --- /dev/null +++ b/pytorch_model-00001-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80630cd9850f8e0cdf0c77236e07732de74cb4d721f31435d40b864dde7250fc +size 9948738670 diff --git a/pytorch_model-00002-of-00003.bin b/pytorch_model-00002-of-00003.bin new file mode 100644 index 0000000..65022c6 --- /dev/null +++ b/pytorch_model-00002-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1068a14238ccad3a414c4436a023bd262e9e523b96dac71d02594ded7ade5fc +size 9904165024 diff --git a/pytorch_model-00003-of-00003.bin b/pytorch_model-00003-of-00003.bin new file mode 100644 index 0000000..1b75d7e --- /dev/null +++ b/pytorch_model-00003-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:991c8be4755a527334779dc44942098c8c9200eaf2e61232d3ef02858a0815ee +size 6178993865 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..134ffac --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,410 @@ +{ + "metadata": { + "total_size": 26031759360 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00003-of-00003.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.norm.weight": "pytorch_model-00003-of-00003.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..f928b24 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..93baf4e --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,34 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 4096, + "pad_token": null, + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +}