初始化项目，由ModelHub XC社区提供模型

Model: Bingsu/exaone-3.0-7.8b-it Source: Original Platform
2026-05-11 17:36:03 +08:00
commit 6da7c3d370
20 changed files with 313275 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
--- a/162
+++ b/162
@@ -0,0 +1,162 @@
+EXAONE AI Model License Agreement 1.1 - NC
+
+This License Agreement (“Agreement”) is entered into between you (“Licensee”) and LG Management Development 
+Institute Co., Ltd. (“Licensor”), governing the use of the EXAONE AI Model (“Model”). By downloading, 
+installing, copying, or using the Model, you agree to comply with and be bound by the terms of this Agreement.
+If you do not agree to all the terms, you must not download, install, copy, or use the Model. This Agreement 
+constitutes a binding legal agreement between the Licensee and Licensor.
+
+1. Definitions
+    1.1 Model: The artificial intelligence model provided by Licensor, which includes any software, 
+    algorithms, machine learning models, or related components supplied by Licensor. This definition extends 
+    to encompass all updates, enhancements, improvements, bug fixes, patches, or other modifications that may 
+    be provided by Licensor from time to time, whether automatically or manually implemented.
+    1.2 Derivatives: Any modifications, alterations, enhancements, improvements, adaptations, or derivative 
+    works of the Model created by Licensee or any third party. This includes changes made to the Model's 
+    architecture, parameters, data processing methods, or any other aspect of the Model that results in a 
+    modification of its functionality or output.
+    1.3 Output: Any data, results, content, predictions, analyses, insights, or other materials generated by 
+    the Model or Derivatives, regardless of whether they are in their original form or have been further 
+    processed or modified by the Licensee. This includes, but is not limited to, textual or numerical produced 
+    directly or indirectly through the use of the Model.
+    1.4 Licensor: LG Management Development Institute Co., Ltd., the owner, developer, and provider of the 
+    EXAONE AI Model. The Licensor holds all rights, title, and interest in the Model and is responsible for 
+    granting licenses to use the Model under the terms specified in this Agreement.
+    1.5 Licensee: The individual, organization, corporation, academic institution, government agency, or other 
+    entity using or intending to use the Model under the terms and conditions of this Agreement. The Licensee 
+    is responsible for ensuring compliance with the Agreement by all authorized users who access or utilize 
+    the Model on behalf of the Licensee.
+
+2. License Grant
+    2.1 Grant of License: Subject to the terms and conditions outlined in this Agreement, the Licensor hereby 
+    grants the Licensee a limited, non-exclusive, non-transferable, worldwide, and revocable license to:
+        a. Access, download, install, and use the Model solely for research purposes. This includes 
+        evaluation, testing, academic research, experimentation, and participation in competitions, provided 
+        that such participation is in a non-commercial context. Notwithstanding Section 3.1, the Licensee may 
+        only provide the Model or Derivatives for a competition if no commercial license is granted to the 
+        competition organizer or any third party.
+        b. Publicly disclose research results and findings derived from the use of the Model or Derivatives, 
+        including publishing papers or presentations.
+        c. Modify the Model and create Derivatives based on the Model, provided that such modifications and 
+        Derivatives are used exclusively for research purposes. The Licensee may conduct experiments, perform 
+        analyses, and apply custom modifications to the Model to explore its capabilities and performance 
+        under various scenarios. If the Model is modified, the modified Model must include “EXAONE” at the 
+        beginning of its name.
+        d. Distribute the Model and Derivatives in each case with a copy of this Agreement.
+    2.2 Scope of License: The license granted herein does not authorize the Licensee to use the Model for any 
+    purpose not explicitly permitted under this Agreement. Any use beyond the scope of this license, including 
+    any commercial application or external distribution, is strictly prohibited unless explicitly agreed upon 
+    in writing by the Licensor.
+
+3. Restrictions
+    3.1 Commercial Use: The Licensee is expressly prohibited from using the Model, Derivatives, or Output for 
+    any commercial purposes, including but not limited to, developing or deploying products, services, or 
+    applications that generate revenue, whether directly or indirectly. Any commercial exploitation of the 
+    Model or its derivatives requires a separate commercial license agreement with the Licensor. Furthermore, 
+    the Licensee shall not use the Model, Derivatives or Output to develop or improve other models.
+    3.2 Reverse Engineering: The Licensee shall not decompile, disassemble, reverse engineer, or attempt to 
+    derive the source code, underlying ideas, algorithms, or structure of the Model, except to the extent that 
+    such activities are expressly permitted by applicable law. Any attempt to bypass or circumvent 
+    technological protection measures applied to the Model is strictly prohibited.
+    3.3 Unlawful Use: The Licensee shall not use the Model and Derivatives for any illegal, fraudulent, or 
+    unauthorized activities, nor for any purpose that violates applicable laws or regulations. This includes 
+    but is not limited to the creation, distribution, or dissemination of malicious, deceptive, or unlawful 
+    content.
+    3.4 Ethical Use: The Licensee shall ensure that the Model or Derivatives is used in an ethical and 
+    responsible manner, adhering to the following guidelines:
+        a. The Model and Derivatives shall not be used to generate, propagate, or amplify false, misleading, 
+        or harmful information, including fake news, misinformation, or disinformation.
+        b. The Model and Derivatives shall not be employed to create, distribute, or promote content that is 
+        discriminatory, harassing, defamatory, abusive, or otherwise offensive to individuals or groups based 
+        on race, gender, sexual orientation, religion, nationality, or other protected characteristics.
+        c. The Model and Derivatives shall not infringe on the rights of others, including intellectual 
+        property rights, privacy rights, or any other rights recognized by law. The Licensee shall obtain all 
+        necessary permissions and consents before using the Model and Derivatives in a manner that may impact 
+        the rights of third parties.
+        d. The Model and Derivatives shall not be used in a way that causes harm, whether physical, mental, 
+        emotional, or financial, to individuals, organizations, or communities. The Licensee shall take all 
+        reasonable measures to prevent misuse or abuse of the Model and Derivatives that could result in harm 
+        or injury.
+
+4. Ownership
+    4.1 Intellectual Property: All rights, title, and interest in and to the Model, including any 
+    modifications, Derivatives, and associated documentation, are and shall remain the exclusive property of 
+    the Licensor. The Licensee acknowledges that this Agreement does not transfer any ownership rights to the 
+    Licensee. All trademarks, service marks, and logos associated with the Model are the property of the 
+    Licensor.
+    4.2 Output: All rights, title, and interest in and to the Output generated by the Model and Derivatives 
+    whether in its original form or modified, are and shall remain the exclusive property of the Licensor.
+    Licensee may use, modify, and distribute the Output and its derivatives for research purpose. The Licensee 
+    shall not claim ownership of the Output except as expressly provided in this Agreement. The Licensee may 
+    use the Output solely for the purposes permitted under this Agreement and shall not exploit the Output for 
+    unauthorized or commercial purposes.
+    4.3 Attribution: In any publication or presentation of results obtained using the Model, the Licensee 
+    shall provide appropriate attribution to the Licensor, citing the Model's name and version, along with any 
+    relevant documentation or references specified by the Licensor.
+
+5. No Warranty
+    5.1 “As-Is” Basis: The Model, Derivatives, and Output are provided on an “as-is” and “as-available” basis, 
+    without any warranties or representations of any kind, whether express, implied, or statutory. The 
+    Licensor disclaims all warranties, including but not limited to, implied warranties of merchantability, 
+    fitness for a particular purpose, accuracy, reliability, non-infringement, or any warranty arising from 
+    the course of dealing or usage of trade.
+    5.2 Performance and Reliability: The Licensor does not warrant or guarantee that the Model, Derivatives or 
+    Output will meet the Licensee’s requirements, that the operation of the Model, Derivatives or Output will 
+    be uninterrupted or error-free, or that defects in the Model will be corrected. The Licensee acknowledges 
+    that the use of the Model, Derivatives or Output is at its own risk and that the Model, Derivatives or 
+    Output may contain bugs, errors, or other limitations.
+    5.3 No Endorsement: The Licensor does not endorse, approve, or certify any results, conclusions, or 
+    recommendations derived from the use of the Model. The Licensee is solely responsible for evaluating the 
+    accuracy, reliability, and suitability of the Model for its intended purposes.
+
+6. Limitation of Liability
+    6.1 No Liability for Damages: To the fullest extent permitted by applicable law, in no event shall the 
+    Licensor be liable for any special, incidental, indirect, consequential, exemplary, or punitive damages, 
+    including but not limited to, damages for loss of business profits, business interruption, loss of 
+    business information, loss of data, or any other pecuniary or non-pecuniary loss arising out of or in 
+    connection with the use or inability to use the Model, Derivatives or any Output, even if the Licensor has 
+    been advised of the possibility of such damages.
+    6.2 Indemnification: The Licensee agrees to indemnify, defend, and hold harmless the Licensor, its 
+    affiliates, officers, directors, employees, and agents from and against any claims, liabilities, damages, 
+    losses, costs, or expenses (including reasonable attorneys' fees) arising out of or related to the 
+    Licensee's use of the Model, any Derivatives, or any Output, including any violation of this Agreement or 
+    applicable laws.
+
+7. Termination
+    7.1 Termination by Licensor: The Licensor reserves the right to terminate this Agreement and revoke the 
+    Licensee’s rights to use the Model at any time, with or without cause, and without prior notice if the 
+    Licensee breaches any of the terms or conditions of this Agreement. Termination shall be effective 
+    immediately upon notice.
+    7.2 Effect of Termination: Upon termination of this Agreement, the Licensee must immediately cease all use 
+    of the Model, Derivatives, and Output and destroy all copies of the Model, Derivatives, and Output in its 
+    possession or control, including any backup or archival copies. The Licensee shall certify in writing to 
+    the Licensor that such destruction has been completed.
+    7.3 Survival: The provisions of this Agreement that by their nature should survive termination, including 
+    but not limited to, Sections 4 (Ownership), 5 (No Warranty), 6 (Limitation of Liability), and this Section 
+    7 (Termination), shall continue to apply after termination.
+
+8. Governing Law
+    8.1 Governing Law: This Agreement shall be governed by and construed in accordance with the laws of the 
+    Republic of Korea, without regard to its conflict of laws principles.
+    8.2 Arbitration: Any disputes, controversies, or claims arising out of or relating to this Agreement, 
+    including its existence, validity, interpretation, performance, breach, or termination, shall be referred 
+    to and finally resolved by arbitration administered by the Korean Commercial Arbitration Board (KCAB) in 
+    accordance with the International Arbitration Rules of the Korean Commercial Arbitration Board in force at 
+    the time of the commencement of the arbitration. The seat of arbitration shall be Seoul, Republic of 
+    Korea. The tribunal shall consist of one arbitrator. The language of the arbitration shall be English.
+
+9. Alterations
+    9.1 Modifications: The Licensor reserves the right to modify or amend this Agreement at any time, in its 
+    sole discretion. Any modifications will be effective upon posting the updated Agreement on the Licensor’s 
+    website or through other means of communication. The Licensee is responsible for reviewing the Agreement 
+    periodically for changes. Continued use of the Model after any modifications have been made constitutes 
+    acceptance of the revised Agreement.
+    9.2 Entire Agreement: This Agreement constitutes the entire agreement between the Licensee and Licensor 
+    concerning the subject matter hereof and supersedes all prior or contemporaneous oral or written 
+    agreements, representations, or understandings. Any terms or conditions of any purchase order or other 
+    document submitted by the Licensee in connection with the Model that are in addition to, different from, 
+    or inconsistent with the terms and conditions of this Agreement are not binding on the Licensor and are 
+    void.
+
+By downloading, installing, or using the EXAONE AI Model, the Licensee acknowledges that it has read, 
+understood, and agrees to be bound by the terms and conditions of this Agreement.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,189 @@
+---
+library_name: transformers
+license: other
+license_name: exaone
+license_link: LICENSE
+language:
+  - en
+  - ko
+tags:
+  - lg-ai
+  - exaone
+---
+
+# EXAONE-3.0-8B-it
+
+[!WARNING]
+
+https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
+
+chat template이 적용 안될 수 있습니다.
+
+llama-cpp-python은 jinja2를 사용하므로 템플릿이 적용되나, 이외의 런타임에서는 적용되지 않을 수 있음에 유의하세요.
+
+
+```py
+from llama_cpp import Llama
+
+llm = Llama.from_pretrained(
+    repo_id="Bingsu/exaone-3.0-7.8b-it",
+    filename="exaone-3.0-7.8B-it-Q8_0.gguf"
+)
+```
+
+```sh
+llama_model_loader: loaded meta data with 34 key-value pairs and 291 tensors from /root/.cache/huggingface/hub/models--Bingsu--exaone-3.0-7.8b-it/snapshots/c7b9c43a7d1db6509b40e9b18f10ae0554b3d4cb/./exaone-3.0-7.8B-it-Q8_0.gguf (version GGUF V3 (latest))
+llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
+llama_model_loader: - kv   0:                       general.architecture str              = llama
+llama_model_loader: - kv   1:                               general.type str              = model
+llama_model_loader: - kv   2:                               general.name str              = Exaone 3.0 7.8b It
+llama_model_loader: - kv   3:                           general.finetune str              = it
+llama_model_loader: - kv   4:                           general.basename str              = exaone-3.0
+llama_model_loader: - kv   5:                         general.size_label str              = 7.8B
+llama_model_loader: - kv   6:                            general.license str              = other
+llama_model_loader: - kv   7:                       general.license.name str              = exaone
+llama_model_loader: - kv   8:                       general.license.link str              = LICENSE
+llama_model_loader: - kv   9:                               general.tags arr[str,2]       = ["lg-ai", "exaone"]
+llama_model_loader: - kv  10:                          general.languages arr[str,2]       = ["en", "ko"]
+llama_model_loader: - kv  11:                          llama.block_count u32              = 32
+llama_model_loader: - kv  12:                       llama.context_length u32              = 4096
+llama_model_loader: - kv  13:                     llama.embedding_length u32              = 4096
+llama_model_loader: - kv  14:                  llama.feed_forward_length u32              = 14336
+llama_model_loader: - kv  15:                 llama.attention.head_count u32              = 32
+llama_model_loader: - kv  16:              llama.attention.head_count_kv u32              = 8
+llama_model_loader: - kv  17:                       llama.rope.freq_base f32              = 500000.000000
+llama_model_loader: - kv  18:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
+llama_model_loader: - kv  19:                          general.file_type u32              = 7
+llama_model_loader: - kv  20:                           llama.vocab_size u32              = 102400
+llama_model_loader: - kv  21:                 llama.rope.dimension_count u32              = 128
+llama_model_loader: - kv  22:            tokenizer.ggml.add_space_prefix bool             = false
+llama_model_loader: - kv  23:                       tokenizer.ggml.model str              = gpt2
+llama_model_loader: - kv  24:                         tokenizer.ggml.pre str              = default
+llama_model_loader: - kv  25:                      tokenizer.ggml.tokens arr[str,102400]  = ["[PAD]", "[BOS]", "[EOS]", "[UNK]", ...
+llama_model_loader: - kv  26:                  tokenizer.ggml.token_type arr[i32,102400]  = [3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, ...
+llama_model_loader: - kv  27:                      tokenizer.ggml.merges arr[str,101782]  = ["t h", "Ġ a", "Ġ í", "i n", "Ġ t...
+llama_model_loader: - kv  28:                tokenizer.ggml.bos_token_id u32              = 1
+llama_model_loader: - kv  29:                tokenizer.ggml.eos_token_id u32              = 361
+llama_model_loader: - kv  30:            tokenizer.ggml.unknown_token_id u32              = 3
+llama_model_loader: - kv  31:            tokenizer.ggml.padding_token_id u32              = 0
+llama_model_loader: - kv  32:                    tokenizer.chat_template str              = {% for message in messages %}{% if lo...
+llama_model_loader: - kv  33:               general.quantization_version u32              = 2
+llama_model_loader: - type  f32:   65 tensors
+llama_model_loader: - type q8_0:  226 tensors
+llm_load_vocab: special tokens cache size = 362
+llm_load_vocab: token to piece cache size = 0.6622 MB
+llm_load_print_meta: format           = GGUF V3 (latest)
+llm_load_print_meta: arch             = llama
+llm_load_print_meta: vocab type       = BPE
+llm_load_print_meta: n_vocab          = 102400
+llm_load_print_meta: n_merges         = 101782
+llm_load_print_meta: vocab_only       = 0
+llm_load_print_meta: n_ctx_train      = 4096
+llm_load_print_meta: n_embd           = 4096
+llm_load_print_meta: n_layer          = 32
+llm_load_print_meta: n_head           = 32
+llm_load_print_meta: n_head_kv        = 8
+llm_load_print_meta: n_rot            = 128
+llm_load_print_meta: n_swa            = 0
+llm_load_print_meta: n_embd_head_k    = 128
+llm_load_print_meta: n_embd_head_v    = 128
+llm_load_print_meta: n_gqa            = 4
+llm_load_print_meta: n_embd_k_gqa     = 1024
+llm_load_print_meta: n_embd_v_gqa     = 1024
+llm_load_print_meta: f_norm_eps       = 0.0e+00
+llm_load_print_meta: f_norm_rms_eps   = 1.0e-05
+llm_load_print_meta: f_clamp_kqv      = 0.0e+00
+llm_load_print_meta: f_max_alibi_bias = 0.0e+00
+llm_load_print_meta: f_logit_scale    = 0.0e+00
+llm_load_print_meta: n_ff             = 14336
+llm_load_print_meta: n_expert         = 0
+llm_load_print_meta: n_expert_used    = 0
+llm_load_print_meta: causal attn      = 1
+llm_load_print_meta: pooling type     = 0
+llm_load_print_meta: rope type        = 0
+llm_load_print_meta: rope scaling     = linear
+llm_load_print_meta: freq_base_train  = 500000.0
+llm_load_print_meta: freq_scale_train = 1
+llm_load_print_meta: n_ctx_orig_yarn  = 4096
+llm_load_print_meta: rope_finetuned   = unknown
+llm_load_print_meta: ssm_d_conv       = 0
+llm_load_print_meta: ssm_d_inner      = 0
+llm_load_print_meta: ssm_d_state      = 0
+llm_load_print_meta: ssm_dt_rank      = 0
+llm_load_print_meta: model type       = 8B
+llm_load_print_meta: model ftype      = Q8_0
+llm_load_print_meta: model params     = 7.82 B
+llm_load_print_meta: model size       = 7.74 GiB (8.50 BPW) 
+llm_load_print_meta: general.name     = Exaone 3.0 7.8b It
+llm_load_print_meta: BOS token        = 1 '[BOS]'
+llm_load_print_meta: EOS token        = 361 '[|endofturn|]'
+llm_load_print_meta: UNK token        = 3 '[UNK]'
+llm_load_print_meta: PAD token        = 0 '[PAD]'
+llm_load_print_meta: LF token         = 490 'Ä'
+llm_load_print_meta: EOT token        = 42 '<|endoftext|>'
+llm_load_print_meta: max token length = 48
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    yes
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 CUDA devices:
+  Device 0: NVIDIA L4, compute capability 8.9, VMM: yes
+llm_load_tensors: ggml ctx size =    0.14 MiB
+llm_load_tensors: offloading 0 repeating layers to GPU
+llm_load_tensors: offloaded 0/33 layers to GPU
+llm_load_tensors:        CPU buffer size =  7923.02 MiB
+............................................................................................
+llama_new_context_with_model: n_ctx      = 512
+llama_new_context_with_model: n_batch    = 512
+llama_new_context_with_model: n_ubatch   = 512
+llama_new_context_with_model: flash_attn = 0
+llama_new_context_with_model: freq_base  = 500000.0
+llama_new_context_with_model: freq_scale = 1
+llama_kv_cache_init:  CUDA_Host KV buffer size =    64.00 MiB
+llama_new_context_with_model: KV self size  =   64.00 MiB, K (f16):   32.00 MiB, V (f16):   32.00 MiB
+llama_new_context_with_model:  CUDA_Host  output buffer size =     0.39 MiB
+llama_new_context_with_model:      CUDA0 compute buffer size =   633.00 MiB
+llama_new_context_with_model:  CUDA_Host compute buffer size =     9.01 MiB
+llama_new_context_with_model: graph nodes  = 1030
+llama_new_context_with_model: graph splits = 356
+AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | LLAMAFILE = 1 | 
+Model metadata: {'tokenizer.ggml.unknown_token_id': '3', 'tokenizer.ggml.eos_token_id': '361', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'gpt2', 'tokenizer.ggml.add_space_prefix': 'false', 'llama.rope.dimension_count': '128', 'llama.vocab_size': '102400', 'general.file_type': '7', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.freq_base': '500000.000000', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'general.architecture': 'llama', 'llama.attention.head_count_kv': '8', 'llama.block_count': '32', 'tokenizer.ggml.padding_token_id': '0', 'general.basename': 'exaone-3.0', 'tokenizer.ggml.pre': 'default', 'llama.context_length': '4096', 'general.name': 'Exaone 3.0 7.8b It', 'general.type': 'model', 'general.size_label': '7.8B', 'general.finetune': 'it', 'general.license.name': 'exaone', 'tokenizer.chat_template': "{% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]\n' }}{% endif %}{{ '[|' + message['role'] + '|]' + message['content'] }}{% if message['role'] == 'user' %}{{ '\n' }}{% else %}{{ '[|endofturn|]\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[|assistant|]' }}{% endif %}", 'general.license.link': 'LICENSE', 'general.license': 'other', 'llama.feed_forward_length': '14336', 'llama.embedding_length': '4096'}
+Available chat formats from metadata: chat_template.default
+Using gguf chat template: {% for message in messages %}{% if loop.first and message['role'] != 'system' %}{{ '[|system|][|endofturn|]
+' }}{% endif %}{{ '[|' + message['role'] + '|]' + message['content'] }}{% if message['role'] == 'user' %}{{ '
+' }}{% else %}{{ '[|endofturn|]
+' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '[|assistant|]' }}{% endif %}
+Using chat eos_token: [|endofturn|]
+Using chat bos_token: [BOS]
+```
+
+```py
+llm.create_chat_completion(
+      messages = [
+          {
+              "role": "system",
+              "content": "You are EXAONE model from LG AI Research, a helpful assistant."
+        },
+          {
+              "role": "user",
+              "content": "다 해줬잖아"
+          }
+      ]
+)
+```
+
+```sh
+llama_print_timings:        load time =    1812.86 ms
+llama_print_timings:      sample time =      20.39 ms /   220 runs   (    0.09 ms per token, 10788.54 tokens per second)
+llama_print_timings: prompt eval time =    1812.72 ms /    38 tokens (   47.70 ms per token,    20.96 tokens per second)
+llama_print_timings:        eval time =   33280.46 ms /   219 runs   (  151.97 ms per token,     6.58 tokens per second)
+llama_print_timings:       total time =   35397.95 ms /   257 tokens
+{'id': 'chatcmpl-451b0538-c70d-45f4-924b-106f5ac3c02f',
+ 'object': 'chat.completion',
+ 'created': 1723204952,
+ 'model': '/root/.cache/huggingface/hub/models--Bingsu--exaone-3.0-7.8b-it/snapshots/c7b9c43a7d1db6509b40e9b18f10ae0554b3d4cb/./exaone-3.0-7.8B-it-Q8_0.gguf',
+ 'choices': [{'index': 0,
+   'message': {'role': 'assistant',
+    'content': '네, 알겠습니다. 이전에 말씀하신 내용을 요약해 드리겠습니다:\n\n1. EXAONE 2.0 모델의 특징:\n   - 7.8B instruction 튜닝 파라미터\n   - 한국어와 영어에서 우수한 성능\n   - 다양한 작업에서 높은 정확도\n\n2. 연구 논문:\n   - "EXAONE 2.0: An Open-Retrieval Large Language Model for Dense Retrieval and Question Answering"\n\n3. 주요 성과:\n   - 한국어와 영어에서 우수한 성능\n   - 다양한 작업에서 높은 정확도\n\n4. 활용 사례:\n   - 고객 지원 챗봇\n   - 법률 문서 요약\n   - 의료 정보 제공\n\n5. 기술적 세부 사항:\n   - 7.8B instruction 튜닝 파라미터\n   - 한국어와 영어에서 우수한 성능\n   - 다양한 작업에서 높은 정확도\n\n이 외에 추가로 궁금한 사항이 있으시면 언제든지 말씀해 주세요!'},
+   'logprobs': None,
+   'finish_reason': 'stop'}],
+ 'usage': {'prompt_tokens': 38, 'completion_tokens': 219, 'total_tokens': 257}}
+```
--- a/config.json
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+  "_name_or_path": "exaone-3.0-7.8b-it",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 361,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.44.0",
+  "use_cache": true,
+  "vocab_size": 102400
+}
--- a/exaone-3.0-7.8B-it-BF16.gguf
+++ b/exaone-3.0-7.8B-it-BF16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d85a4d98fb3084814c52c248b71362121da8a73e42907e2f10806bf8f1536589
+size 15641630336
--- a/exaone-3.0-7.8B-it-F32.gguf
+++ b/exaone-3.0-7.8B-it-F32.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6adbc1e66cd955bc15c636065fde0b62e32cc52eb2a393331836a04e77c9c359
+size 31277995392
--- a/exaone-3.0-7.8B-it-Q4_K_M.gguf
+++ b/exaone-3.0-7.8B-it-Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3302fec70d81557d3c25cb94ca6803543c8cd3e3af8535ab1a07fc87cdc494f2
+size 4770649472
--- a/exaone-3.0-7.8B-it-Q5_K_M.gguf
+++ b/exaone-3.0-7.8B-it-Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4607a1ff5f0eae57ca02dbf3b65c9c5f441939eda2b4f439f52662316480a67c
+size 5569664384
--- a/exaone-3.0-7.8B-it-Q8_0.gguf
+++ b/exaone-3.0-7.8B-it-Q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:199b4bc6fb408cd6fdf90bc3348e7a6031d55169995e0026aa8bf85e35919f6c
+size 8312084096
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 361,
+  "pad_token_id": 0,
+  "transformers_version": "4.44.0"
+}
--- a/merges.txt
+++ b/merges.txt
--- a/model-00001-of-00004.safetensors
+++ b/model-00001-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72546dc32bda344002327e92ad451c5f3155721f92933106ae298fa022539630
+size 4966213488
--- a/model-00002-of-00004.safetensors
+++ b/model-00002-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80ab43f9b3ad6c66e99dfa4bd5669e5bab16a0ef86e24fd301162ab474cd6851
+size 4915916168
--- a/model-00003-of-00004.safetensors
+++ b/model-00003-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca25a03b8fb2add6835b8c34523fac06b0138d82357108e8d93959e24579a213
+size 4915941072
--- a/model-00004-of-00004.safetensors
+++ b/model-00004-of-00004.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e797933f1cc7eaad59084281c5f0d74ad9415ed23c8cb3c263656895a3e28160
+size 838860928
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,298 @@
+{
+  "metadata": {
+    "total_size": 15636897792
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.norm.weight": "model-00003-of-00004.safetensors"
+  }
+}
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+  "bos_token": {
+    "content": "[BOS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[|endofturn|]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
--- a/vocab.json
+++ b/vocab.json