From eee02ab4dfb72e11b5c5d373c89c212b050c0d66 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 22 May 2026 02:44:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: davidkim205/komt-mistral-7b-v1 Source: Original Platform --- .gitattributes | 35 ++++ README.md | 121 +++++++++++++ added_tokens.json | 5 + config.json | 25 +++ generation_config.json | 6 + pytorch_model-00001-of-00002.bin | 3 + pytorch_model-00002-of-00002.bin | 3 + pytorch_model.bin.index.json | 298 +++++++++++++++++++++++++++++++ special_tokens_map.json | 11 ++ tokenizer.model | 3 + tokenizer_config.json | 49 +++++ 11 files changed, 559 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 added_tokens.json create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 pytorch_model-00001-of-00002.bin create mode 100644 pytorch_model-00002-of-00002.bin create mode 100644 pytorch_model.bin.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..57fb5df --- /dev/null +++ b/README.md @@ -0,0 +1,121 @@ +--- +language: +- en +- ko +pipeline_tag: text-generation +tags: +- finetuned + +--- +# komt : korean multi task instruction tuning model +![multi task instruction tuning.jpg](https://github.com/davidkim205/komt/assets/16680469/c7f6ade7-247e-4b62-a94f-47e19abea68e) + +Recently, due to the success of ChatGPT, numerous large language models have emerged in an attempt to catch up with ChatGPT's capabilities. +However, when it comes to Korean language performance, it has been observed that many models still struggle to provide accurate answers or generate Korean text effectively. +This study addresses these challenges by introducing a multi-task instruction technique that leverages supervised datasets from various tasks to create training data for Large Language Models (LLMs). + +## Model Details + +* **Model Developers** : davidkim(changyeon kim) +* **Repository** : https://github.com/davidkim205/komt +* **Model Architecture** : The komt-mistral-7b-v1 is is a fine-tuned version of the Mistral-7B-Instruct-v0.1. + +## Dataset +korean multi-task instruction dataset + +## Hardware and Software +- nvidia driver : 535.54.03 +- CUDA Version: 12.2 + +## Training +Refer https://github.com/davidkim205/komt + + +## Prompt template: Mistral +``` +[INST] {prompt} [/INST] +``` + +## Usage + + +``` +from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers import TextStreamer, GenerationConfig + +model_name='davidkim205/komt-mistral-7b-v1' +model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") +tokenizer = AutoTokenizer.from_pretrained(model_name) +streamer = TextStreamer(tokenizer) + +def gen(x): + generation_config = GenerationConfig( + temperature=0.8, + top_p=0.8, + top_k=100, + max_new_tokens=1024, + early_stopping=True, + do_sample=True, + ) + q = f"[INST]{x} [/INST]" + gened = model.generate( + **tokenizer( + q, + return_tensors='pt', + return_token_type_ids=False + ).to('cuda'), + generation_config=generation_config, + pad_token_id=tokenizer.eos_token_id, + eos_token_id=tokenizer.eos_token_id, + streamer=streamer, + ) + result_str = tokenizer.decode(gened[0]) + + start_tag = f"\n\n### Response: " + start_index = result_str.find(start_tag) + + if start_index != -1: + result_str = result_str[start_index + len(start_tag):].strip() + return result_str + +print(gen('제주도를 1박2일로 혼자 여행하려고 하는데 여행 코스를 만들어줘')) +``` +output +``` + [INST]제주도를 1박2일로 혼자 여행하려고 하는데 여행 코스를 만들어줘 [/INST]1박 2일 여행 코스 +제주 여행 코스는 다음과 같습니다: + +첫째 날: +* 아침: 제주시 신라로 이동하여 한라산 국립공원에 도착합니다. 오른쪽 발자국 광장에서 눈에 띄는 맥주숍을 찾아 한 병의 맥주를 마시고 계단으로 올라가 한라산 정상에 도착합니다. +* 점심: 정상에서 계단을 내려와 중도에 위치한 골목에서 음식을 즐깁니다. 젓갈, 곰탕, 전복 등 맛있는 제주 특산물을 맛보며 힐링을 즐깁니다. +* 오후: 중도에서 남하하여 남서쪽 제주로 이동합니다. 남서쪽 제주는 섬의 자연 경관과 백사장, 아름다운 바다 등 아름다운 풍광이 있는 곳입니다. 숙소에 숙박합니다. + +둘째 날: +* 아침: 남서쪽 제주에서 북서쪽 제주로 이동합니다. 이 지역은 푸른 잔디 해변과 투명한 바다가 펼쳐져 있는 아름다운 풍광입니다. 숙소에서 아침을 먹고 바다로 향하여 해변에서 휴식을 취합니다. +* 점심: 북서쪽 제주의 앞바다에서 시원한 바다를 보며 한 끼의 해산물을 맛보고 계절에 따라 해산물 요리를 즐깁니다. +* 오후: 남서쪽 제주에서 이동하여 서쪽 제주로 이동합니다. 서쪽 제주는 절벽과 암석 해변, 수수께끼의 돌 정원 등 독특한 경관이 있는 곳입니다. 이곳에서는 서쪽 제주의 대표적인 명소인 청량사를 방문하고 서쪽 제주의 아름다운 풍광을 감상합니다. +* 저녁: 서쪽 제주에서 제주 시로 이동하여 한 끼의 제주 특산물을 맛보고 도착한 제주 도심에서 저녁을 즐깁니다. +* 야간: 제주 시의 도심에서 야간 활동을 즐기며 1박 2일의 여행을 마무리합니다. + +이렇게 제주를 1박 2일로 혼자 여행하면 제주의 아름다운 풍광, 푸른 잔디 해변, 투명한 바다 등을 경험할 수 있습니다. +``` +## Evaluation +For objective model evaluation, we initially used EleutherAI's lm-evaluation-harness but obtained unsatisfactory results. Consequently, we conducted evaluations using ChatGPT, a widely used model, as described in [Self-Alignment with Instruction Backtranslation](https://arxiv.org/pdf/2308.06502.pdf) and [Three Ways of Using Large Language Models to Evaluate Chat](https://arxiv.org/pdf/2308.06259.pdf) . + + +| model | score | average(0~5) | percentage | +| --------------------------------------- |---------| ------------ | ---------- | +| gpt-3.5-turbo(close) | 147 | 3.97 | 79.45% | +| naver Cue(close) | 140 | 3.78 | 75.67% | +| clova X(close) | 136 | 3.67 | 73.51% | +| WizardLM-13B-V1.2(open) | 96 | 2.59 | 51.89% | +| Llama-2-7b-chat-hf(open) | 67 | 1.81 | 36.21% | +| Llama-2-13b-chat-hf(open) | 73 | 1.91 | 38.37% | +| nlpai-lab/kullm-polyglot-12.8b-v2(open) | 70 | 1.89 | 37.83% | +| kfkas/Llama-2-ko-7b-Chat(open) | 96 | 2.59 | 51.89% | +| beomi/KoAlpaca-Polyglot-12.8B(open) | 100 | 2.70 | 54.05% | +| **komt-llama2-7b-v1 (open)(ours)** | **117** | **3.16** | **63.24%** | +| **komt-llama2-13b-v1 (open)(ours)** | **129** | **3.48** | **69.72%** | +| **komt-llama-30b-v1 (open)(ours)** | **129** | **3.16** | **63.24%** | +| **komt-mistral-7b-v1 (open)(ours)** | **131** | **3.54** | **70.81%** | + diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..cbce74e --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,5 @@ +{ + "": 2, + "": 1, + "": 0 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..8452e74 --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.1", + "architectures": [ + "MistralForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.34.0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..7327bd3 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.34.0" +} diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000..33f6393 --- /dev/null +++ b/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306af06eb8d98cd1d2496ea909c72d1f78ee60b61ccb4a200b466d0042a2cb91 +size 9943028531 diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000..d6256ae --- /dev/null +++ b/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb267a8dbde638904abfa12cf35a0088ae7177c8199b21021b1b780f4b58ab5 +size 4540536134 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..520ee2f --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 14483464192 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00002-of-00002.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.norm.weight": "pytorch_model-00002-of-00002.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8cd5f1e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,11 @@ +{ + "additional_special_tokens": [ + "", + "", + "" + ], + "bos_token": "", + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..8b443ef --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..628e993 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,49 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1024, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "tokenizer_file": "/home/dev/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.1/snapshots/7ad5799710574ba1c1d953eba3077af582f3a773/tokenizer.json", + "unk_token": "", + "use_default_system_prompt": true +}