From 00664e68cff007e584da669e86ebb1d5072f0ec2 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 13 Jun 2026 18:30:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: KoboldAI/LLaMA2-13B-Holomax Source: Original Platform --- .gitattributes | 47 ++++ README.md | 189 ++++++++++++++ config.json | 26 ++ configuration.json | 1 + generation_config.json | 10 + model-00001-of-00003.safetensors | 3 + model-00002-of-00003.safetensors | 3 + model-00003-of-00003.safetensors | 3 + model.safetensors.index.json | 410 +++++++++++++++++++++++++++++++ pytorch_model-00001-of-00003.bin | 3 + pytorch_model-00002-of-00003.bin | 3 + pytorch_model-00003-of-00003.bin | 3 + pytorch_model.bin.index.json | 3 + special_tokens_map.json | 24 ++ tokenizer.model | 3 + tokenizer_config.json | 35 +++ 16 files changed, 766 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00003.safetensors create mode 100644 model-00002-of-00003.safetensors create mode 100644 model-00003-of-00003.safetensors create mode 100644 model.safetensors.index.json create mode 100644 pytorch_model-00001-of-00003.bin create mode 100644 pytorch_model-00002-of-00003.bin create mode 100644 pytorch_model-00003-of-00003.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..53d7257 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,47 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2cb8f22 --- /dev/null +++ b/README.md @@ -0,0 +1,189 @@ +--- +license: other +--- +# LLaMA 2 Holomax 13B - The writers version of Mythomax + +This is an expansion merge to the well praised Mythomax model from Gryphe (60%) using MrSeeker's KoboldAI Holodeck model (40%) +The goal of this model is to enhance story writing capabilities while preserving the desirable traits of the Mythomax model as much as possible (It does limit chat reply length). + +Testers found that this model passes the InteracTV benchmark, was useful for story writing, chatting and text adventures using Instruction mode. +Preservation of factual knowledge has not been tested since we expect the original to be better in those use cases as this merge was focussed on fiction. + +## Credits +This merge is not possible without the following models and model authors (Thanks to all of you for your work!) + +Mythomax by Gryphe: +- Mythologic-L2 by Gryphe: +- - Hermes by Nous-Research + - Chronos V2 by Elinas + - Airoboros m2.0 by Jondurbin +- Huginn by Face of Goonery: +- - Hermes by Nous-Research + - StableBeluga by StabilityAI + - Airoboros by Jondurbin + - Chronos by Elinas + - Limarp by Lemonila + +Holodeck by Mr.Seeker + +## Guidelines +This model is designed to be flexible, it should be able to be used as a co-writing model, as well as a variety of instruct formats (Tested with Alpaca) and regular chatting both augmented with traditional formatting and instruct formatting. +The Alpaca format is as follows: +``` +### Instruction: + +Instruction goes here + +### Response: +``` +But if you have a different preferred format that works on one of the models above it will likely still work. + +## License +After publishing the model we were informed that one of the origin models upstream was uploaded under the AGPLv3, it is currently unknown what effects this has on this model because all weights have been modified and none of the original weights are intact. +At the moment of publishing (and writing this message) both merged models Holodeck and Mythomax were licensed Llama2, therefore the Llama2 license applies to this model. +However, Holodeck contains a non-commercial clause and may only be used for research or private use, while Limarp is licensed AGPLv3. +AGPLv3 conflicts with the commercial usage restrictions of the Llama2 license, therefore we assume this aspect does not apply and the authors indended for commercial usage restrictions to be permitted. +As a result we have decided to leave the model available for public download on the assumption that all involved authors intend for it to be licensed with commercial restrictions / llama2 restrictions in place, but with the further rights and freedoms the AGPLv3 grants a user. + +If HF informs us that this assumption is incorrect and requests us to take this model down, we will republish the model in the form of the original merging script that was used to create the end result. +To comply with the AGPLv3 aspect the "source" of this model is as follows (Because this model is made on a binary level, we can only provide the script that created the model): +``` +import json +import os +import shutil +import subprocess +from tkinter.filedialog import askdirectory, askopenfilename + +import torch +from colorama import Fore, Style, init +from transformers import (AutoModel, AutoModelForCausalLM, AutoTokenizer, + LlamaConfig, LlamaForCausalLM, LlamaTokenizer, + PreTrainedTokenizer, PreTrainedTokenizerFast) + +newline = '\n' +def clear_console(): + if os.name == "nt": # For Windows + subprocess.call("cls", shell=True) + else: # For Linux and macOS + subprocess.call("clear", shell=True) + +clear_console() +print(f"{Fore.YELLOW}Starting script, please wait...{Style.RESET_ALL}") + +#mixer output settings +blend_ratio = 0.4 #setting to 0 gives first model, and 1 gives second model +fp16 = False #perform operations in fp16. Saves memory, but CPU inference will not be possible. +always_output_fp16 = True #if true, will output fp16 even if operating in fp32 +max_shard_size = "10000MiB" #set output shard size +force_cpu = True #only use cpu +load_sharded = True #load both models shard by shard + +print(f"Blend Ratio set to: {Fore.GREEN}{blend_ratio}{Style.RESET_ALL}") +print(f"Operations in fp16 is: {Fore.GREEN}{fp16}{Style.RESET_ALL}") +print(f"Save Result in fp16: {Fore.GREEN}{always_output_fp16}{Style.RESET_ALL}") +print(f"CPU RAM Only: {Fore.GREEN}{force_cpu}{Style.RESET_ALL}{newline}") + +#test generation settings, only for fp32 +deterministic_test = True #determines if outputs are always the same +test_prompt = "" #test prompt for generation. only for fp32. set to empty string to skip generating. +test_max_length = 32 #test generation length + + +blend_ratio_b = 1.0 - blend_ratio + +def get_model_info(model): + with torch.no_grad(): + outfo = "" + cntent = 0 + outfo += "\n==============================\n" + for name, para in model.named_parameters(): + cntent += 1 + outfo += ('{}: {}'.format(name, para.shape))+"\n" + outfo += ("Num Entries: " + str(cntent))+"\n" + outfo += ("==============================\n") + return outfo + +def merge_models(model1,model2): + with torch.no_grad(): + tensornum = 0 + for p1, p2 in zip(model1.parameters(), model2.parameters()): + p1 *= blend_ratio + p2 *= blend_ratio_b + p1 += p2 + tensornum += 1 + print("Merging tensor "+str(tensornum)) + pass + +def read_index_filenames(sourcedir): + index = json.load(open(sourcedir + '/pytorch_model.bin.index.json','rt')) + fl = [] + for k,v in index['weight_map'].items(): + if v not in fl: + fl.append(v) + return fl + +print("Opening file dialog, please select FIRST model directory...") +model_path1 = "Gryphe/MythoMax-L2-13b" +print(f"First Model is: {model_path1}") +print("Opening file dialog, please select SECOND model directory...") +model_path2 = "KoboldAI/LLAMA2-13B-Holodeck-1" +print(f"Second Model is: {model_path2}") +print("Opening file dialog, please select OUTPUT model directory...") +model_path3 = askdirectory(title="Select Output Directory of merged model") +print(f"Merged Save Directory is: {model_path3}{newline}") +if not model_path1 or not model_path2: + print("\nYou must select two directories containing models to merge and one output directory. Exiting.") + exit() + +with torch.no_grad(): + if fp16: + torch.set_default_dtype(torch.float16) + else: + torch.set_default_dtype(torch.float32) + + device = torch.device("cuda") if (torch.cuda.is_available() and not force_cpu) else torch.device("cpu") + print(device) + + print("Loading Model 1...") + model1 = AutoModelForCausalLM.from_pretrained(model_path1) #,torch_dtype=torch.float16 + model1 = model1.to(device) + model1.eval() + print("Model 1 Loaded. Dtype: " + str(model1.dtype)) + print("Loading Model 2...") + model2 = AutoModelForCausalLM.from_pretrained(model_path2) #,torch_dtype=torch.float16 + model2 = model2.to(device) + model2.eval() + print("Model 2 Loaded. Dtype: " + str(model2.dtype)) + +# Saving for posterity reasons, handy for troubleshooting if model result is broken +# #ensure both models have the exact same layout +# m1_info = get_model_info(model1) +# m2_info = get_model_info(model2) +# if m1_info != m2_info: +# print("Model 1 Info: " + m1_info) +# print("Model 2 Info: " + m2_info) +# print("\nERROR:\nThe two selected models are not compatible! They must have identical structure!") +# exit() + + print("Merging models...") + merge_models(model1,model2) + + if model_path3: + print("Saving new model...") + if always_output_fp16 and not fp16: + model1.half() + model1.save_pretrained(model_path3, max_shard_size=max_shard_size) + print("\nSaved to: " + model_path3) + print("\nCopying files to: " + model_path3) + files_to_copy = ["tokenizer.model", "special_tokens_map.json", "tokenizer_config.json", "vocab.json", "merges.txt"] + for filename in files_to_copy: + src_path = os.path.join(model_path1, filename) + dst_path = os.path.join(model_path3, filename) + try: + shutil.copy2(src_path, dst_path) + except FileNotFoundError: + print("\nFile " + filename + " not found in" + model_path1 + ". Skipping.") + else: + print("\nOutput model was not saved as no output path was selected.") + print("\nScript Completed.") +``` \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..3246ee3 --- /dev/null +++ b/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "/home/mixer/koboldai/models/MythoMax-L2-13b", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.31.0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..1e68158 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,10 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 4096, + "pad_token_id": 0, + "temperature": 0.9, + "top_p": 0.6, + "transformers_version": "4.31.0" +} diff --git a/model-00001-of-00003.safetensors b/model-00001-of-00003.safetensors new file mode 100644 index 0000000..3abce43 --- /dev/null +++ b/model-00001-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f3241243af0769f16ace34d42a33b742ba323a1fd507a85756f39c75e5fdd0 +size 9948693272 diff --git a/model-00002-of-00003.safetensors b/model-00002-of-00003.safetensors new file mode 100644 index 0000000..efa1cf9 --- /dev/null +++ b/model-00002-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555c0ad0ef5e17b3451420c65ce1f6ebe37f99580a173bf16576d64aee67f62b +size 9904129368 diff --git a/model-00003-of-00003.safetensors b/model-00003-of-00003.safetensors new file mode 100644 index 0000000..56d59fb --- /dev/null +++ b/model-00003-of-00003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e40788c74493f6125b3194699a4236d553008c7e6d200137f061861a9ce67c +size 6178962272 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..852a29d --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,410 @@ +{ + "metadata": { + "total_size": 26031738880 + }, + "weight_map": { + "lm_head.weight": "model-00003-of-00003.safetensors", + "model.embed_tokens.weight": "model-00001-of-00003.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.norm.weight": "model-00003-of-00003.safetensors" + } +} diff --git a/pytorch_model-00001-of-00003.bin b/pytorch_model-00001-of-00003.bin new file mode 100644 index 0000000..145c8b5 --- /dev/null +++ b/pytorch_model-00001-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c083c199392bbaed597be318b63d2966f2877a3e9711052f69aba26733332606 +size 10478280794 diff --git a/pytorch_model-00002-of-00003.bin b/pytorch_model-00002-of-00003.bin new file mode 100644 index 0000000..f2d0873 --- /dev/null +++ b/pytorch_model-00002-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d234b4a74b5979a42d67f1859a20f83c2ba33aedcc8dacbed0093b6841edce +size 10360317630 diff --git a/pytorch_model-00003-of-00003.bin b/pytorch_model-00003-of-00003.bin new file mode 100644 index 0000000..6a2030d --- /dev/null +++ b/pytorch_model-00003-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dfb0e40fe7681546a690fafbd384fd6cd924262478cdd72d04e63dd3fa543e0 +size 5193273553 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..6533e2a --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bce53d7ad4ccfb7659877cdcec5392ddca85035f1675db4db344ad0c5f1b741 +size 33444 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14761dc --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..508754b --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,35 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 4096, + "pad_token": null, + "padding_side": "right", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +}