初始化项目，由ModelHub XC社区提供模型

Model: ghost-x/ghost-7b-v0.9.1 Source: Original Platform
2026-04-25 13:32:00 +08:00
commit 4a20cb1143
17 changed files with 91975 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,38 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 model-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
 model.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
 model.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,369 @@
 ---
 language:
 - en
 - vi
 license: mit
 library_name: transformers
 tags:
 - ghost
 pipeline_tag: text-generation
 model-index:
 - name: ghost-7b-v0.9.1
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 55.38
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 77.03
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 54.78
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 43.96
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 72.53
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 26.91
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=lamhieu/ghost-7b-v0.9.1
      name: Open LLM Leaderboard
 widget:
 - text: 'How many helicopters can a human eat in one sitting'
  output:
    text: "Ahoy, me matey! A human can eat approximately one helicopter in one sitting, but only if they're a giant sea monster with a stomach the size of a small country. 🤢🤢 So, it's not advisable to try this, pirate! 🏰🛢️"
 ---
 # Ghost 7B v0.9.1
 <img src="https://tjzk.replicate.delivery/models_models_cover_image/7501431e-8f99-4b75-86bc-0bcc68c920bf/openart-image_JB8EpEBU_1710680733.jpg" alt="Ghost 7B v0.9.1 Logo" width="400" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
 **Ghost 7B, v0.9.1, flying**
 An early release version of the **Ghost 7B Alpha** model.
 The next generation of large language models focuses on optimization for excellent reasoning and multi-task knowledge.
 [▶️ Experience it on Colab](https://tinyurl.com/ghost7b091)
 In addition, the model also has versions: [GUFF](https://huggingface.co/lamhieu/ghost-7b-v0.9.1-gguf) and [AWQ](https://huggingface.co/lamhieu/ghost-7b-v0.9.1-awq).
 ### Come on, create yourself an AI assistant, according to your wishes!
 In your language, maybe Vietnamese.
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/_4EmivXdOYjQpBVpIO9WL.png" width="600" align="center" />
 Or, English.
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/ctmTOz5V7pHm0FnX8c6BD.png" width="600" align="center" />
 ### Let the assistant become an expert, and more.
 The challenge of the model's ability to understand the language.
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/N0RJUFFf1t8QRg8AVyxNj.png" width="600" align="center" />
 Challenge the model's reasoning ability, in Vietnamese language.
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/KUXjV2XJK5vNy7genVtfN.png" width="600" align="center" />
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/ngX6unqUNnnBGq4R1gYY2.png" width="600" align="center" />
 In case of using Vietnamese language, it lacks accents, abbreviations or uses slang.
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/xSL8WErn5girbKxUbEOsh.png" width="600" align="center" />
 <img src="https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/-IXPjLL_QGb_5frOKftUW.png" width="600" align="center" />
 ## 📚 Model Details
 ### Model Description
 A version to consider comprehension in generating languages other than the original language being initially trained, here is the Vietnamese language. A brief summary of the effectiveness of the **Mistral 7B** model for training with a new language is excellent and low cost.
 I have started training the [Ghost 7B v0.9.0](https://huggingface.co/lamhieu/ghost-7b-v0.9.0) model again, with a smaller amount of data, it is estimated to be only about 150MB. In that data, about 70% is Vietnamese, the rest is almost English.
 The approach here uses QLora for training then merges them. Also, I am very thankful to Unsloth for their features.
 ## ⛹️‍♂️ Uses
 ### Online using Google Colab
 To make it easier to play around with the model, I created a notebook in [Google Colab](https://tinyurl.com/ghost7b091) so you can start experimenting.
 ### Directly
 For direct use, you can easily get started with the following steps.
 * Firstly, you need to install **transformers** via the command below with `pip`.
  ```bash
  pip install -U transformers
  ```
 * Right now, you can start using the model directly.
  ```python
  import torch
  from transformers import (
      AutoModelForCausalLM,
      AutoTokenizer,
  )
  base_model = "lamhieu/ghost-7b-v0.9.1"
  model = AutoModelForCausalLM.from_pretrained(
      base_model,
      torch_dtype=torch.bfloat16,
      trust_remote_code=True,
      device_map="auto",
  )
  tokenizer = AutoTokenizer.from_pretrained(base_model)
  messages = [
      {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate"},
      {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
  ]
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  tokenized = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
  outputs = model.generate(**tokenized, max_new_tokens=512)
  results = tokenizer.batch_decode(outputs)[0]
  print(results)
  ```
 * Additionally, you can also use a model with **4bit quantization** to reduce the required resources at least. You can start with the code below.
  ```python
  import torch
  from transformers import (
      AutoModelForCausalLM,
      AutoTokenizer,
      BitsAndBytesConfig,
  )
  base_model = "lamhieu/ghost-7b-v0.9.1"
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_compute_dtype=torch.bfloat16,
      bnb_4bit_use_double_quant=False,
  )
  model = AutoModelForCausalLM.from_pretrained(
      base_model,
      quantization_config=bnb_config,
      trust_remote_code=True,
      device_map="auto",
  )
  tokenizer = AutoTokenizer.from_pretrained(base_model)
  messages = [
      {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate"},
      {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
  ]
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  tokenized = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
  outputs = model.generate(**tokenized, max_new_tokens=512)
  results = tokenizer.batch_decode(outputs)[0]
  print(results)
  ```
 ### Summary
 Although the amount of training data is small, it is "great". You don't need to worry too much that it won't be able to meet some of your requirements. Instead, try experimenting with the model of what you want.
 One more thing, use it like you would **ChatGPT**, I've purposely tweaked it to be able to replace my app (for some tasks, and it does a good job). It's okay with both Vietnamese and English languages. It would be great to hear feedback about the experience, feel free to leave information in the discussion section.
 Setting up the system prompt will have a great impact on the performance and quality of the content generated by the model. Keep this in mind to always ensure the model is used for your intended purpose, the goal is to achieve good results but.
 It's best to always set system, you can still leave it empty if you always want to set it.
 ## 🥇 Evaluation
 ### [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_lamhieu__ghost-7b-v0.9.1)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |55.10|
 |AI2 Reasoning Challenge (25-Shot)|55.38|
 |HellaSwag (10-Shot)              |77.03|
 |MMLU (5-Shot)                    |54.78|
 |TruthfulQA (0-shot)              |43.96|
 |Winogrande (5-shot)              |72.53|
 |GSM8k (5-shot)                   |26.91|
 ### VMLU
 A Vietnamese Multitask Language Understanding Benchmark Suite for Large Language Models.
 With the score achieved, the model can rank **3rd** in VMLU's "Leaderboard of fine-tuned models" list, as of the date of evaluation.
 ![image/png](https://cdn-uploads.huggingface.co/production/uploads/600ae38cc92b79f54efd4556/yuDiym9y_o_tlRVr90pGX.png)
 <details>
  <summary>Details</summary>
 ```json
 {
  "humanity": {
    "administrative_law": 52.22,
    "business_law": 40.22,
    "civil_law": 46.11,
    "criminal_law": 49.08,
    "economic_law": 39.75,
    "education_law": 42.17,
    "elementary_history": 55.37,
    "high_school_history": 36.67,
    "high_school_literature": 37.78,
    "history_of_world_civilization": 46.67,
    "idealogical_and_moral_cultivation": 50,
    "introduction_to_laws": 45.24,
    "vietnamese_language_and_literature": 34.48,
    "total": 43.3,
    "revolutionary_policy_of_the_vietnamese_commununist_part": 51.11,
    "introduction_to_vietnam_culture": 30.56,
    "logic": 27.01,
    "middle_school_history": 44.44,
    "middle_school_literature": 50.57
  },
  "stem": {
    "total": 34.73,
    "applied_informatics": 50.56,
    "computer_architecture": 33.89,
    "computer_network": 43.02,
    "discrete_mathematics": 31.52,
    "electrical_engineering": 30.68,
    "elementary_mathematics": 30,
    "elementary_science": 58.89,
    "high_school_biology": 38.33,
    "high_school_chemistry": 28.89,
    "high_school_mathematics": 26.35,
    "high_school_physics": 29.44,
    "introduction_to_chemistry": 27.37,
    "introduction_to_physics": 31.79,
    "introduction_to_programming": 36.31,
    "metrology_engineer": 31.21,
    "middle_school_biology": 46.47,
    "middle_school_chemistry": 30.56,
    "middle_school_mathematics": 30.56,
    "middle_school_physics": 30,
    "operating_system": 40.56,
    "statistics_and_probability": 22.99
  },
  "total": 39.58,
  "other": {
    "accountant": 31.55,
    "civil_servant": 42.11,
    "clinical_pharmacology": 33.89,
    "driving_license_certificate": 59.06,
    "environmental_engineering": 28.07,
    "internal_basic_medicine": 39.77,
    "preschool_pedagogy": 46.08,
    "tax_accountant": 22.41,
    "tax_civil_servant": 47.95,
    "total": 38.99
  },
  "social_science": {
    "business_administration": 41.38,
    "high_school_civil_education": 45,
    "high_school_geography": 34.57,
    "ho_chi_minh_ideology": 48.04,
    "macroeconomics": 31.11,
    "microeconomics": 37.22,
    "middle_school_civil_education": 66.29,
    "middle_school_geography": 48.3,
    "principles_of_marxism_and_leninism": 30,
    "sociology": 53.93,
    "total": 43.58
  }
 }
 ```
 </details>
 ## 📜 More Information
 Note, this is a personal research project with a limited budget, so the model only stops at the evaluation level with the developed approach. Apart from that, I think I can definitely build a model with better quality in terms of language and other performance using this approach.
 ### Thanks for the support
 Model trained with **Unsloth**, many thanks.
 <img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/made with unsloth.png" width="200px" align="center" />
 ## 📨 Model Card Contact
 **Lam Hieu** (lamhieu.vk@gmail.com)
--- a/config.json
+++ b/config.json
@@ -0,0 +1,27 @@
 {
  "_name_or_path": "lamhieu/ghost-7b-v0.9.1",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 32768,
  "model_type": "mistral",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pad_token_id": 2,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000.0,
  "sliding_window": 4096,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.37.0",
  "use_cache": true,
  "vocab_size": 32000
 }
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,6 @@
 {
  "_from_model_config": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "transformers_version": "4.37.0"
 }
--- a/model-00001-of-00008.safetensors
+++ b/model-00001-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:94397a9ed24f868e6368a0a88f83befad89b125fb2180fc45d28feec6d9f0088
 size 1889587040
--- a/model-00002-of-00008.safetensors
+++ b/model-00002-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:3a118b85b560f959af57fc973f4d1e6d448cb1fe04db4bae21999ee4bcbe0478
 size 1946243936
--- a/model-00003-of-00008.safetensors
+++ b/model-00003-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:3c34d0150a8ada3d40643392710798716f9b6c7285eb983ecbf6f0eede96ca89
 size 1979781432
--- a/model-00004-of-00008.safetensors
+++ b/model-00004-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:529501cb935b5b008bb789ffd2f781902599046dcf7007188434164c842ff832
 size 1946243984
--- a/model-00005-of-00008.safetensors
+++ b/model-00005-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:48501a0ba4df48c70b5369c0c384b9058e2bdf7863fece2364885325cd949103
 size 1979781448
--- a/model-00006-of-00008.safetensors
+++ b/model-00006-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a83387af5283625571a907612ec0db4fea709d04840132ba28230a215e05a2e7
 size 1946243984
--- a/model-00007-of-00008.safetensors
+++ b/model-00007-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:7253786e5ecdbd12215900f9f0c22a287e1bde20eb24dfe1667f5698df65f21b
 size 1979781448
--- a/model-00008-of-00008.safetensors
+++ b/model-00008-of-00008.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0243df5c73b55128ad66c2cdeaf94b3e38d923f5ce9a4dfe0cc4900bf4464c28
 size 815834680
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,298 @@
 {
  "metadata": {
    "total_size": 14483464192
  },
  "weight_map": {
    "lm_head.weight": "model-00008-of-00008.safetensors",
    "model.embed_tokens.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.input_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.input_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.input_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.12.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.13.input_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.input_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.input_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.17.input_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.17.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.17.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
    "model.layers.18.input_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.input_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.2.input_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.20.input_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.input_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.21.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
    "model.layers.22.input_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.input_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.input_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.input_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.26.input_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.26.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.26.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
    "model.layers.27.input_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.input_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.input_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.3.input_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.3.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
    "model.layers.30.input_layernorm.weight": "model-00008-of-00008.safetensors",
    "model.layers.30.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.30.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.30.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
    "model.layers.30.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.30.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.30.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.30.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
    "model.layers.31.input_layernorm.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.self_attn.k_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.self_attn.o_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.self_attn.q_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.31.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
    "model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.input_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.input_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.8.input_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.8.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.8.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
    "model.layers.9.input_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
    "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
    "model.norm.weight": "model-00008-of-00008.safetensors"
  }
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,35 @@
 {
  "additional_special_tokens": [
    "<unk>",
    "<s>",
    "</s>"
  ],
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer.model
+++ b/tokenizer.model
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,53 @@
 {
  "add_bos_token": true,
  "add_eos_token": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<unk>",
    "<s>",
    "</s>"
  ],
  "bos_token": "<s>",
  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "</s>",
  "legacy": true,
  "max_length": 4096,
  "model_max_length": 32768,
  "pad_to_multiple_of": null,
  "pad_token": "</s>",
  "pad_token_type_id": 0,
  "padding_side": "right",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "stride": 0,
  "tokenizer_class": "LlamaTokenizer",
  "truncation_side": "left",
  "unk_token": "<unk>",
  "use_default_system_prompt": true
 }