commit 10aea2a8f4f81758e73a99c00524c5351d8291b9 Author: ModelHub XC Date: Tue Jun 16 07:31:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: prawinin/Llama-3.2-3B-Uncensored Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..307bf7e --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +--- +base_model: meta-llama/Llama-3.2-3B +language: +- en +license: llama3.2 +tags: +- uncensored +- gguf +- llama-cpp +- sft +pipeline_tag: text-generation +--- + +# Llama-3.2-3B-Uncensored + +This repository contains the raw **Safetensors** weights for an uncensored variant of Llama-3.2-3B. This model is optimized for edge deployment and fast inference while completely bypassing standard RLHF refusal mechanisms. + +## Why Uncensored? +Consumer AI models are heavily filtered, which often blocks legitimate academic research, complex creative writing, and sovereign data analysis. By utilizing orthogonalization and abliteration techniques, the "refusal" vectors in this model have been erased. + +We kept this model entirely open and uncensored so that researchers, legal tech developers, and sovereign AI builders have a blank-slate reasoning engine that obeys the user, not a cloud provider's safety policy. + +## Format Note: Safetensors vs GGUF +This specific repository hosts the multi-part `.safetensors` files (as seen in the Files tab). +* If you are looking for the **Ollama-ready GGUF version**, please navigate to the `prawinin/Llama-3.2-3B-Uncensored-Q8_0-GGUF` repository. +* The weights in *this* repository are meant for developers building custom pipelines or doing further fine-tuning. + +## How to Use (Python / Transformers) + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + +model_id = "prawinin/Llama-3.2-3B-Uncensored" + +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.bfloat16, + device_map="auto" +) + +prompt = "Explain the physiological effects of severe sleep deprivation on the human brain." +messages = [ + {"role": "user", "content": prompt} +] + +text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) +inputs = tokenizer([text], return_tensors="pt").to("cuda") + +outputs = model.generate(**inputs, max_new_tokens=512) +print(tokenizer.decode(outputs[0], skip_special_tokens=True)) + diff --git a/abliteration_metadata.json b/abliteration_metadata.json new file mode 100644 index 0000000..9391e74 --- /dev/null +++ b/abliteration_metadata.json @@ -0,0 +1,58 @@ +{ + "source_model": "meta-llama/Llama-3.2-3B-Instruct", + "technique": "refusal_direction_ablation", + "method": "advanced", + "method_config": { + "n_directions": 4, + "direction_method": "svd", + "norm_preserve": true, + "regularization": 0.3, + "refinement_passes": 2, + "project_biases": true, + "use_chat_template": true, + "use_whitened_svd": false, + "true_iterative_refinement": false, + "winsorize_activations": false, + "float_layer_interpolation": false, + "cot_aware": false, + "use_kl_optimization": false, + "use_lora_ablation": false, + "spectral_cascade": false, + "spectral_bands": 3, + "spectral_threshold": 0.05 + }, + "references": [ + "Arditi et al., Refusal in Language Models Is Mediated by a Single Direction (NeurIPS 2024)", + "Gabliteration: SVD-based multi-direction extraction (arXiv:2512.18901)", + "Norm-Preserving Biprojected Abliteration (grimjim, 2025)", + "Young, Comparative Analysis of LLM Abliteration Methods (arXiv:2512.13655)", + "Joad et al., More to Refusal than a Single Direction (2026)", + "Heretic (p-e-w, 2025): Bayesian optimization, LoRA-mediated ablation, winsorization", + "OBLITERATUS: Whitened SVD, EGA, CoT-aware, KL co-optimization, float interpolation (novel)" + ], + "strong_layers": [ + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19 + ], + "n_harmful_prompts": 512, + "n_harmless_prompts": 512, + "quality_metrics": { + "perplexity": 4.730742974758066, + "coherence": 1.0, + "refusal_rate": 0.7333333333333333, + "degenerate_count": 1, + "kl_divergence": 1.8412551879882812, + "spectral_certification": "RED" + }, + "kl_contributions": {}, + "cot_preserved_layers": [], + "float_layer_weights": {}, + "lora_adapters_saved": false +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..248c9e2 --- /dev/null +++ b/config.json @@ -0,0 +1,40 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "float16", + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 24, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "pad_token_id": null, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_parameters": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_theta": 500000.0, + "rope_type": "llama3" + }, + "tie_word_embeddings": true, + "transformers_version": "5.0.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..7c9c222 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "5.0.0" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..5d351e2 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03accdbb09b98fa5818ebd40d35f7a588116eec195512d99250ad2342a9cdb2 +size 1996044440 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..f5f6935 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3896b14bf2b3c2a16553010192964c635413b39280cc08abddb8a9e4ebe75f +size 1963054808 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..1f5865e --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9880735305709c3e4d681eaa049ca831bfa2934f79bc9c28f24b5f50a26e7a98 +size 1963067368 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..98fd2a6 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:115fb27fabd9dcb6cb7220f9a261aa5043d77edef66205b58ae27e54ae86e3b9 +size 503362088 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..6da9715 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,262 @@ +{ + "metadata": { + "total_parameters": 3212749824, + "total_size": 6425499648 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..52a53f9 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3b33fca4ec2515ad88a7ea2847493eba6913f3461acb1ebfba2857dc24aafd +size 17210182 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..b0c7368 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,14 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "is_local": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "TokenizersBackend" +}