From 4ff1a66e2a2dec9cf169c30fc2209b9f3dcc1b4f Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 17 May 2026 09:03:47 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: fblgit/UNA-SOLAR-10.7B-Instruct-v1.0 Source: Original Platform --- .gitattributes | 37 +++ README.md | 282 ++++++++++++++++++++ config.json | 28 ++ generation_config.json | 8 + ggml-model-q5_k_m.gguf | 3 + ggml-model-q6_k.gguf | 3 + model-00001-of-00005.safetensors | 3 + model-00002-of-00005.safetensors | 3 + model-00003-of-00005.safetensors | 3 + model-00004-of-00005.safetensors | 3 + model-00005-of-00005.safetensors | 3 + model.safetensors.index.json | 442 +++++++++++++++++++++++++++++++ special_tokens_map.json | 30 +++ tokenizer.model | 3 + tokenizer_config.json | 43 +++ 15 files changed, 894 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 ggml-model-q5_k_m.gguf create mode 100644 ggml-model-q6_k.gguf create mode 100644 model-00001-of-00005.safetensors create mode 100644 model-00002-of-00005.safetensors create mode 100644 model-00003-of-00005.safetensors create mode 100644 model-00004-of-00005.safetensors create mode 100644 model-00005-of-00005.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e3e007d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +ggml-model-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text +ggml-model-q6_k.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..ee58b60 --- /dev/null +++ b/README.md @@ -0,0 +1,282 @@ +--- +base_model: upstage/SOLAR-10.7B-Instruct-v1.0 +tags: +- alignment-handbook +- generated_from_trainer +- UNA +- single-turn +model-index: +- name: UNA-SOLAR-10.7B-Instruct-v1.0 + results: [] +license: cc-by-nc-nd-4.0 +language: +- en +library_name: transformers +--- + +# UNA: Uniform Neural Alignment + +SFT Further: +- Linear +- 2e-5 + +Merges: +- Fan in: `0:2` +- Fan out: `-4:` +- Intermediary layers: `1/1/1/0/1/1/0/1/0/1/1/0/1/1/0` use the On/Off as a way of regularise. +## Quants + +* [ggml-model-q5_k_m.gguf](https://huggingface.co/fblgit/UNA-SOLAR-10.7B-Instruct-v1.0/resolve/main/ggml-model-q5_k_m.gguf?download=true) +* [ggml-model-q6_k.gguf](https://huggingface.co/fblgit/UNA-SOLAR-10.7B-Instruct-v1.0/resolve/main/ggml-model-q6_k.gguf?download=true) + +## Libraries: + +- Transformers 4.35.0-UNA +- Pytorch 2.1.0 +- Datasets 2.14.6 +- Tokenizers 0.14.1 + +## Evals LM-Evaluation Harness +`mt-bench`: +``` +Mode: single +Input file: data/mt_bench/model_judgment/gpt-4_single.jsonl + +########## First turn ########## + score +model turn +gpt-4 1 8.95625 +claude-v1 1 8.15000 +gpt-3.5-turbo 1 8.07500 +LUNA-SOLARkrautLM-Instruct 1 7.93750 +UNA-SOLAR-10.7B-Instruct-v1.0 1 7.80625 +vicuna-33b-v1.3 1 7.45625 +wizardlm-30b 1 7.13125 +tulu-30b 1 7.01875 +vicuna-13b-v1.3 1 6.81250 +guanaco-65b 1 6.78125 +nous-hermes-13b 1 6.43125 +alpaca-13b 1 4.97500 +rwkv-4-raven-14b 1 4.74375 +llama-13b 1 3.26250 + +########## Second turn ########## + score +model turn +gpt-4 2 9.025000 +gpt-3.5-turbo 2 7.812500 +claude-v1 2 7.650000 +UNA-SOLAR-10.7B-Instruct-v1.0 2 7.237500 +LUNA-SOLARkrautLM-Instruct 2 6.987500 +wizardlm-30b 2 6.887500 +vicuna-33b-v1.3 2 6.787500 +guanaco-65b 2 6.037500 +vicuna-13b-v1.3 2 5.962500 +tulu-30b 2 5.850000 +nous-hermes-13b 2 4.664557 +alpaca-13b 2 4.087500 +rwkv-4-raven-14b 2 3.225000 +llama-13b 2 1.950000 + +########## Average ########## + score +model +gpt-4 8.990625 +gpt-3.5-turbo 7.943750 +claude-instant-v1 7.905660 +claude-v1 7.900000 +UNA-SOLAR-10.7B-Instruct-v1.0 7.521875 +LUNA-SOLARkrautLM-Instruct 7.462500 +vicuna-33b-v1.3 7.121875 +wizardlm-30b 7.009375 +Llama-2-70b-chat 6.856250 +Llama-2-13b-chat 6.650000 +guanaco-33b 6.528125 +tulu-30b 6.434375 +guanaco-65b 6.409375 +oasst-sft-7-llama-30b 6.409375 +palm-2-chat-bison-001 6.400000 +mpt-30b-chat 6.393750 +vicuna-13b-v1.3 6.387500 +wizardlm-13b 6.353125 +Llama-2-7b-chat 6.268750 +vicuna-7b-v1.3 5.996875 +baize-v2-13b 5.750000 +nous-hermes-13b 5.553459 +mpt-7b-chat 5.459119 +gpt4all-13b-snoozy 5.452830 +koala-13b 5.350000 +mpt-30b-instruct 5.218750 +falcon-40b-instruct 5.168750 +h2ogpt-oasst-open-llama-13b 4.625000 +alpaca-13b 4.531250 +chatglm-6b 4.500000 +oasst-sft-4-pythia-12b 4.318750 +rwkv-4-raven-14b 3.984375 +dolly-v2-12b 3.275000 +fastchat-t5-3b 3.040625 +stablelm-tuned-alpha-7b 2.753125 +llama-13b 2.606250 +``` + +`big-refactor` branch: + +``` +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0), gen_kwargs: (None), limit: None, num_fewshot: 25, batch_size: auto (32) +| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr| +|-------------|-------|------|-----:|--------|-----:|---|-----:| +|arc_challenge|Yaml |none | 25|acc |0.6954|± |0.0134| +| | |none | 25|acc_norm|0.7167|± |0.0132| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0), gen_kwargs: (None), limit: None, num_fewshot: 5, batch_size: auto +|Tasks|Version| Filter |n-shot| Metric |Value| |Stderr| +|-----|-------|----------|-----:|-----------|----:|---|-----:| +|gsm8k|Yaml |get-answer| 5|exact_match|0.671|± |0.0129| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0), gen_kwargs: (), limit: None, num_fewshot: 0, batch_size: auto (64) +| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr| +|--------------|-------|------|-----:|------|-----:|---|-----:| +|truthfulqa_mc2|Yaml |none | 0|acc |0.7297|_ |0.0149| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0), gen_kwargs: (None), limit: None, num_fewshot: 10, batch_size: auto (32) +| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr| +|---------|-------|------|-----:|--------|-----:|---|-----:| +|hellaswag|Yaml |none | 10|acc |0.7091|± |0.0045| +| | |none | 10|acc_norm|0.8821|± |0.0032| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0,dtype=float16), gen_kwargs: (), limit: None, num_fewshot: 0, batch_size: auto (32) +| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr| +|--------------|-------|------|-----:|----------|-----:|---|-----:| +|boolq |Yaml |none | 0|acc |0.8807|_ |0.0057| +|lambada_openai|Yaml |none | 0|perplexity|3.2452|_ |0.0778| +| | |none | 0|acc |0.7207|_ |0.0063| +|piqa |Yaml |none | 0|acc |0.8020|_ |0.0093| +| | |none | 0|acc_norm |0.8009|_ |0.0093| +|sciq |Yaml |none | 0|acc |0.9730|_ |0.0051| +| | |none | 0|acc_norm |0.9630|_ |0.0060| +|winogrande |Yaml |none | 0|acc |0.7577|_ |0.0120| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0,dtype=float16), gen_kwargs: (), limit: None, num_fewshot: 0, batch_size: auto (64) +| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr| +|--------|-------|------|-----:|--------|-----:|---|-----:| +|mathqa |Yaml |none | 0|acc |0.3474|_ |0.0087| +| | |none | 0|acc_norm|0.3568|_ |0.0088| +|pubmedqa|Yaml |none | 0|acc |0.5400|_ |0.0223| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0,dtype=float16), gen_kwargs: (), limit: None, num_fewshot: 0, batch_size: auto +| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr| +|------------------------------------------------------|-------|------|-----:|-----------|-----:|---|-----:| +|bbh_fewshot |N/A |none | 0|exact_match|0.4660|_ |0.1771| +| - bbh_fewshot_boolean_expressions |Yaml |none | 0|exact_match|0.8160|_ |0.0246| +| - bbh_fewshot_causal_judgement |Yaml |none | 0|exact_match|0.4973|_ |0.0367| +| - bbh_fewshot_date_understanding |Yaml |none | 0|exact_match|0.4840|_ |0.0317| +| - bbh_fewshot_disambiguation_qa |Yaml |none | 0|exact_match|0.6520|_ |0.0302| +| - bbh_fewshot_dyck_languages |Yaml |none | 0|exact_match|0.2040|_ |0.0255| +| - bbh_fewshot_formal_fallacies |Yaml |none | 0|exact_match|0.5280|_ |0.0316| +| - bbh_fewshot_geometric_shapes |Yaml |none | 0|exact_match|0.3360|_ |0.0299| +| - bbh_fewshot_hyperbaton |Yaml |none | 0|exact_match|0.5520|_ |0.0315| +| - bbh_fewshot_logical_deduction_five_objects |Yaml |none | 0|exact_match|0.4520|_ |0.0315| +| - bbh_fewshot_logical_deduction_seven_objects |Yaml |none | 0|exact_match|0.3920|_ |0.0309| +| - bbh_fewshot_logical_deduction_three_objects |Yaml |none | 0|exact_match|0.6200|_ |0.0308| +| - bbh_fewshot_movie_recommendation |Yaml |none | 0|exact_match|0.6640|_ |0.0299| +| - bbh_fewshot_multistep_arithmetic_two |Yaml |none | 0|exact_match|0.0080|_ |0.0056| +| - bbh_fewshot_navigate |Yaml |none | 0|exact_match|0.6280|_ |0.0306| +| - bbh_fewshot_object_counting |Yaml |none | 0|exact_match|0.3960|_ |0.0310| +| - bbh_fewshot_penguins_in_a_table |Yaml |none | 0|exact_match|0.4726|_ |0.0415| +| - bbh_fewshot_reasoning_about_colored_objects |Yaml |none | 0|exact_match|0.5320|_ |0.0316| +| - bbh_fewshot_ruin_names |Yaml |none | 0|exact_match|0.5680|_ |0.0314| +| - bbh_fewshot_salient_translation_error_detection |Yaml |none | 0|exact_match|0.5480|_ |0.0315| +| - bbh_fewshot_snarks |Yaml |none | 0|exact_match|0.5169|_ |0.0376| +| - bbh_fewshot_sports_understanding |Yaml |none | 0|exact_match|0.8320|_ |0.0237| +| - bbh_fewshot_temporal_sequences |Yaml |none | 0|exact_match|0.5520|_ |0.0315| +| - bbh_fewshot_tracking_shuffled_objects_five_objects |Yaml |none | 0|exact_match|0.1480|_ |0.0225| +| - bbh_fewshot_tracking_shuffled_objects_seven_objects|Yaml |none | 0|exact_match|0.1720|_ |0.0239| +| - bbh_fewshot_tracking_shuffled_objects_three_objects|Yaml |none | 0|exact_match|0.2760|_ |0.0283| +| - bbh_fewshot_web_of_lies |Yaml |none | 0|exact_match|0.4760|_ |0.0316| +| - bbh_fewshot_word_sorting |Yaml |none | 0|exact_match|0.2840|_ |0.0286| + +| Groups |Version|Filter|n-shot| Metric |Value| |Stderr| +|-----------|-------|------|-----:|-----------|----:|---|-----:| +|bbh_fewshot|N/A |none | 0|exact_match|0.466|_ |0.1771| + +hf (pretrained=fblgit/UNA-SOLAR-10.7B-Instruct-v1.0), gen_kwargs: (None), limit: None, num_fewshot: 5, batch_size: auto (16) +| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr| +|---------------------------------------|-------|------|-----:|------|-----:|---|-----:| +|mmlu |N/A |none | 0|acc |0.6513|± |0.1221| +| - humanities |N/A |none | 5|acc |0.6077|± |0.1185| +| - formal_logic |Yaml |none | 5|acc |0.4444|± |0.0444| +| - high_school_european_history |Yaml |none | 5|acc |0.8121|± |0.0305| +| - high_school_us_history |Yaml |none | 5|acc |0.8431|± |0.0255| +| - high_school_world_history |Yaml |none | 5|acc |0.8523|± |0.0231| +| - international_law |Yaml |none | 5|acc |0.7851|± |0.0375| +| - jurisprudence |Yaml |none | 5|acc |0.7870|± |0.0396| +| - logical_fallacies |Yaml |none | 5|acc |0.7546|± |0.0338| +| - moral_disputes |Yaml |none | 5|acc |0.7370|± |0.0237| +| - moral_scenarios |Yaml |none | 5|acc |0.4101|± |0.0164| +| - philosophy |Yaml |none | 5|acc |0.7170|± |0.0256| +| - prehistory |Yaml |none | 5|acc |0.7840|± |0.0229| +| - professional_law |Yaml |none | 5|acc |0.4941|± |0.0128| +| - world_religions |Yaml |none | 5|acc |0.7895|± |0.0313| +| - other |N/A |none | 5|acc |0.7116|± |0.0939| +| - business_ethics |Yaml |none | 5|acc |0.7600|± |0.0429| +| - clinical_knowledge |Yaml |none | 5|acc |0.6792|± |0.0287| +| - college_medicine |Yaml |none | 5|acc |0.6590|± |0.0361| +| - global_facts |Yaml |none | 5|acc |0.3400|± |0.0476| +| - human_aging |Yaml |none | 5|acc |0.6816|± |0.0313| +| - management |Yaml |none | 5|acc |0.8350|± |0.0368| +| - marketing |Yaml |none | 5|acc |0.8547|± |0.0231| +| - medical_genetics |Yaml |none | 5|acc |0.7000|± |0.0461| +| - miscellaneous |Yaml |none | 5|acc |0.8020|± |0.0142| +| - nutrition |Yaml |none | 5|acc |0.7418|± |0.0251| +| - professional_accounting |Yaml |none | 5|acc |0.5071|± |0.0298| +| - professional_medicine |Yaml |none | 5|acc |0.7500|± |0.0263| +| - virology |Yaml |none | 5|acc |0.5843|± |0.0384| +| - social_sciences |N/A |none | 5|acc |0.7537|± |0.0681| +| - econometrics |Yaml |none | 5|acc |0.5000|± |0.0470| +| - high_school_geography |Yaml |none | 5|acc |0.8586|± |0.0248| +| - high_school_government_and_politics|Yaml |none | 5|acc |0.9016|± |0.0215| +| - high_school_macroeconomics |Yaml |none | 5|acc |0.6615|± |0.0240| +| - high_school_microeconomics |Yaml |none | 5|acc |0.7311|± |0.0288| +| - high_school_psychology |Yaml |none | 5|acc |0.8404|± |0.0157| +| - human_sexuality |Yaml |none | 5|acc |0.7328|± |0.0388| +| - professional_psychology |Yaml |none | 5|acc |0.6814|± |0.0189| +| - public_relations |Yaml |none | 5|acc |0.6909|± |0.0443| +| - security_studies |Yaml |none | 5|acc |0.7469|± |0.0278| +| - sociology |Yaml |none | 5|acc |0.8308|± |0.0265| +| - us_foreign_policy |Yaml |none | 5|acc |0.8900|± |0.0314| +| - stem |N/A |none | 5|acc |0.5569|± |0.1380| +| - abstract_algebra |Yaml |none | 5|acc |0.4100|± |0.0494| +| - anatomy |Yaml |none | 5|acc |0.6222|± |0.0419| +| - astronomy |Yaml |none | 5|acc |0.7368|± |0.0358| +| - college_biology |Yaml |none | 5|acc |0.8056|± |0.0331| +| - college_chemistry |Yaml |none | 5|acc |0.4700|± |0.0502| +| - college_computer_science |Yaml |none | 5|acc |0.5100|± |0.0502| +| - college_mathematics |Yaml |none | 5|acc |0.2800|± |0.0451| +| - college_physics |Yaml |none | 5|acc |0.3431|± |0.0472| +| - computer_security |Yaml |none | 5|acc |0.7400|± |0.0441| +| - conceptual_physics |Yaml |none | 5|acc |0.6340|± |0.0315| +| - electrical_engineering |Yaml |none | 5|acc |0.6000|± |0.0408| +| - elementary_mathematics |Yaml |none | 5|acc |0.4815|± |0.0257| +| - high_school_biology |Yaml |none | 5|acc |0.8032|± |0.0226| +| - high_school_chemistry |Yaml |none | 5|acc |0.4877|± |0.0352| +| - high_school_computer_science |Yaml |none | 5|acc |0.7200|± |0.0451| +| - high_school_mathematics |Yaml |none | 5|acc |0.3815|± |0.0296| +| - high_school_physics |Yaml |none | 5|acc |0.3576|± |0.0391| +| - high_school_statistics |Yaml |none | 5|acc |0.5602|± |0.0339| +| - machine_learning |Yaml |none | 5|acc |0.4643|± |0.0473| + +| Groups |Version|Filter|n-shot|Metric|Value | |Stderr| +|------------------|-------|------|-----:|------|-----:|---|-----:| +|mmlu |N/A |none | 0|acc |0.6513|± |0.1221| +| - humanities |N/A |none | 5|acc |0.6077|± |0.1185| +| - other |N/A |none | 5|acc |0.7116|± |0.0939| +| - social_sciences|N/A |none | 5|acc |0.7537|± |0.0681| +| - stem |N/A |none | 5|acc |0.5569|± |0.1380| +``` + + +## Citations + +to [Upstage.AI](https://huggingface.co/upstage) for its awesome base model, this is merely a UNA of it. It can only refine what its already in there :) + +If you find UNA-SOLAR useful, cite and support the authors. \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5cffbed --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.35.2", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..816b376 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2, + "transformers_version": "4.35.2", + "use_cache": false +} diff --git a/ggml-model-q5_k_m.gguf b/ggml-model-q5_k_m.gguf new file mode 100644 index 0000000..3b3d1fc --- /dev/null +++ b/ggml-model-q5_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0ec88b1a8b2d7fd69984c461ab82ee7248759b717ca9abf6d1c02478753ed7 +size 7597931040 diff --git a/ggml-model-q6_k.gguf b/ggml-model-q6_k.gguf new file mode 100644 index 0000000..8212193 --- /dev/null +++ b/ggml-model-q6_k.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0013d6c1445e7cac8dccd7b6a53552fde932869d53740ed5d8078250b464b434 +size 8805210656 diff --git a/model-00001-of-00005.safetensors b/model-00001-of-00005.safetensors new file mode 100644 index 0000000..32356c3 --- /dev/null +++ b/model-00001-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a514c8415e910e577a5292d5c63f38f33d2f52f2c4a2f3efb6bcb34207a962 +size 4943162240 diff --git a/model-00002-of-00005.safetensors b/model-00002-of-00005.safetensors new file mode 100644 index 0000000..6e62a6b --- /dev/null +++ b/model-00002-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d23135edc37a49f3084d521620f43132991d5d206f68a4d606779fa00b5044e0 +size 4999819232 diff --git a/model-00003-of-00005.safetensors b/model-00003-of-00005.safetensors new file mode 100644 index 0000000..c625cc5 --- /dev/null +++ b/model-00003-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09311ba908903f3a67493e6e683a3ac67aaaeaf36f53a0ac88c52ae71dd9729 +size 4915916080 diff --git a/model-00004-of-00005.safetensors b/model-00004-of-00005.safetensors new file mode 100644 index 0000000..04afdcb --- /dev/null +++ b/model-00004-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3109239ef45fad22af14fc864a56698057a401cec0e5fc2525f804102e61c542 +size 4915916080 diff --git a/model-00005-of-00005.safetensors b/model-00005-of-00005.safetensors new file mode 100644 index 0000000..a494a6d --- /dev/null +++ b/model-00005-of-00005.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db0df66382c06f3280a54a6426cd28025a6d424fd20cb46e9df9e08d74a0a912 +size 1688284744 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..d81a244 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,442 @@ +{ + "metadata": { + "total_size": 21463048192 + }, + "weight_map": { + "lm_head.weight": "model-00005-of-00005.safetensors", + "model.embed_tokens.weight": "model-00001-of-00005.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00005.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00005.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00005.safetensors", + "model.layers.34.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.40.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.input_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00004-of-00005.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00004-of-00005.safetensors", + "model.layers.45.input_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.input_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.input_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00005-of-00005.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00005-of-00005.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00005.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00005.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00005.safetensors", + "model.norm.weight": "model-00005-of-00005.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..492d4b2 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..8b443ef --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..47a9eea --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,43 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": true +}