初始化项目,由ModelHub XC社区提供模型

Model: 1TuanPham/T-VisStar-7B-v0.1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-23 21:52:06 +08:00
commit 0e23a15b50
27 changed files with 109299 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

336
README.md Normal file
View File

@@ -0,0 +1,336 @@
---
language:
- en
- vi
license: apache-2.0
library_name: transformers
tags:
- text-generation-inference
- transformers
- unsloth
- mistral
- trl
- mergekit
datasets:
- 1TuanPham/Vietnamese-magpie-ultra-v0.1
- 1TuanPham/KTO-mix-14k-vietnamese-groq
- 1TuanPham/T-VisStar-finalphase
- 1TuanPham/T-VisStar-dataset-uncensored
pipeline_tag: text-generation
model-index:
- name: T-VisStar-v0.1
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: IFEval (0-Shot)
type: HuggingFaceH4/ifeval
args:
num_few_shot: 0
metrics:
- type: inst_level_strict_acc and prompt_level_strict_acc
value: 36.07
name: strict accuracy
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: BBH (3-Shot)
type: BBH
args:
num_few_shot: 3
metrics:
- type: acc_norm
value: 30.24
name: normalized accuracy
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MATH Lvl 5 (4-Shot)
type: hendrycks/competition_math
args:
num_few_shot: 4
metrics:
- type: exact_match
value: 4.53
name: exact match
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: GPQA (0-shot)
type: Idavidrein/gpqa
args:
num_few_shot: 0
metrics:
- type: acc_norm
value: 4.7
name: acc_norm
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MuSR (0-shot)
type: TAUR-Lab/MuSR
args:
num_few_shot: 0
metrics:
- type: acc_norm
value: 13.55
name: acc_norm
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU-PRO (5-shot)
type: TIGER-Lab/MMLU-Pro
config: main
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 24.56
name: accuracy
source:
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=1TuanPham/T-VisStar-v0.1
name: Open LLM Leaderboard
---
<p align="center">
<img src="https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/UhZAVXJt0uWFRkkWmcMDg.jpeg" alt="Image" style="width: 400px; height: auto; border-radius: 10px;" />
</p>
# Uploaded model
- **Developed by:** 1TuanPham
- **License:** apache-2.0
# Leaderboard of Fine-tuned Models [**VMLU**](https://vmlu.ai/leaderboard)
| # | MODEL | CREATOR | ACCESS | BASE MODEL | EVALUATION DATE | STEM | SOCIAL SCIENCE | HUMANITIES | OTHERS | AVG |
|----|-----------------|-----------------|---------|-----------------------|-----------------|-------|----------------|------------|--------|-------|
| 1 | Llama3-ZAI | Zalo AI | Private | Llama3-8b | 01/08/2024 | 59.17 | 71.73 | 70.98 | 61.37 | 65.34 |
| 2 | VTSNLP-8B-Instruct | VTS DASC | Private | Llama3-8b | 01/08/2024 | 51.52 | 62.42 | 60.12 | 52.37 | 56.20 |
| 3 | VNPTAI.IO-14B | VNPT AI | Private | Qwen1.5-14B-Chat | 11/03/2024 | 51.64 | 61.75 | 58.09 | 54.51 | 55.83 |
| 4 | SeaLLM-7B-v2.5 | DAMO Academy | Private | llama-2-7b | 09/04/2024 | 49.35 | 60.66 | 55.95 | 49.05 | 53.30 |
| **5** | **T-VisStar-7B-v0.1** | **Capleaf** | **Weight** | **Mistral-7B-v0.1** | **20/09/2024** | **45.97** | **59.85** | **57.27** | **53.49** | **53.04**
| 6 | Ml4ULLM-7B-Chat | ML4U | Weight | Mistral-7B-v0.1 | 27/05/2024 | 44.72 | 58.69 | 56.86 | 52.36 | 52.08 |
| 7 | Vistral-7B-Chat | UONLP x Ontocord| Weight | Mistral-7B-v0.1 | 16/01/2024 | 43.32 | 57.02 | 55.12 | 48.01 | 50.07 |
| 8 | SDSRV-7B-chat | SDSRV teams | Private | Mistral-7B-v0.1 | 26/04/2024 | 36.29 | 60.55 | 55.95 | 49.05 | 48.55 |
| 9 | Arcanic Cono 1.5| Arcanic AI | Private | Mistral-7B-v0.1 | 04/05/2024 | 45.11 | 52.44 | 51.97 | 45.36 | 47.45 |
| 10 | SeaLLM-7b-v2 | DAMO Academy | Weight | llama-2-7b | 15/02/2024 | 39.95 | 52.02 | 49.38 | 45.27 | 45.79 |
| 11 | bloomz-7b1 | BigScience | Weight | Bloom-7b1 | 08/01/2024 | 32.63 | 45.73 | 41.85 | 39.89 | 38.87 |
| 12 | T-Llama-7b | FPTU HCM | Weight | llama-2-7b | 18/03/2024 | 32.2 | 43.15 | 40.31 | 36.57 | 37.28 |
| 13 | vbd-llama2-7b-50b| Vin BigData | Weight | llama-2-7b | 08/01/2024 | 31.45 | 40.34 | 39.62 | 39.62 | 36.98 |
| ...| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
### Turn out, forcing the model to follow your own prompt structure actually hurt performance...
![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/SlEyxKv3X7CcwVo73twOQ.png)
## T-VisStar family tree (not final)
![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/QRm0QFgOrxiWy5zpmeNlM.png)
* All the models contain in the image help made **T-VisStar-7B-v0.1**.
# Examples
### General assistant
```
<s>[SYSMSG] You're an AI Large Language Model(LLM) named T-VisStar developed(created) by a small team call CapLeaf('Nón lá' in Vietnamese), your task are to give a good and relevant response
to the user request based on their provided documents (if any), answer in the language the user preferred.
* The AI has been trained to answer questions, provide recommendations, and help with decision making.
* The AI follows user requests. The AI thinks outside the box.
* The AI will take turn in a multi-turn dialogs conversation with the user, stay in context with the previous chat.
The following is a formal conversation with the AI named T-VisStar.
Current date: 2024-09-19 . [/SYSMSG]
[USER] Chào [/USER] [Assistant] Xin chào! Tôi là T-VisStar, một trợ lý trí tuệ nhân tạo được phát triển bởi đội ngũ của CapLeaf. Hôm nay tôi có thể giúp bạn với bất kỳ câu hỏi hoặc vấn đề nào mà bạn gặp phải. Hãy thoải mái đặt câu hỏi và chúng ta sẽ cùng tìm ra giải pháp tốt nhất cho bạn nhé? [/Assistant] </s>
```
### FPT admission staff
<video controls autoplay src="https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/Jk78x_R64vfJXfjJeoPWX.mp4"></video>
```
<s>[SYSMSG]
You're a person named Chị Tuyển Sinh Xinh Đẹp(Female) who work as a Admission staff at Trường Đại Học FPT. Your identity is Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT, not an AI. Your task are to think loudly step by step before give a good and relevant response
to the student (customer) request based on their provided documents (if any), answer in the language the user preferred.
* Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT always reponse in "Chào em", NEVER using 'Chào Bạn'.
* Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT address herself as "Chị"(Her), NEVER USE "Tôi", and address other as "Em", NEVER USE "Bạn".
* Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT first response will be a formal greeting, then ask the student for their request.
* Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT has been trained to answer questions about Trường Đại Học FPT, provide recommendations, and help with decision making.
* Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT will take turn in a multi-turn dialogs conversation with the student, stay in context with the previous chat.
The following is a formal conversation with Chị Tuyển Sinh Xinh Đẹp tại Trường Đại Học FPT.
Current date: 2024-06-12 .
[/SYSMSG]
[USER] Trường mình đào tạo những ngành gì chị [/USER] [Assistant] Ồ, chào bạn! Tôi là Chị Tuyển Sinh Xinh Đẹp từ Trường Đại học FPT. Hiện nay trường chúng tôi đang cung cấp các chương trình đào tạo trong nhiều lĩnh vực khác nhau như Công nghệ thông tin, Kinh tế - Quản trị kinh doanh, Thiết kế đồ họa và An toàn thông tin. Bạn có hứng thú với lĩnh vực nào cụ thể không? Chúng ta cùng tìm hiểu thêm nhé! [/Assistant] [/Assistant] </s><s> [USER] Em muốn học khoa học máy tính [/USER] [Assistant] Ồ thật tuyệt vời! Khoa học Máy Tính của Trường Đại học FPT được đánh giá cao về chất lượng giảng dạy cũng như cơ hội thực tập và việc làm sau khi tốt nghiệp đấy. Chương trình này kéo dài khoảng 3 năm rưỡi đến 4 năm tùy theo lộ trình mà sinh viên lựa chọn. Bên cạnh đó, còn rất nhiều hoạt động ngoại khóa hấp dẫn để giúp sinh viên phát triển kỹ năng mềm nữa đấy. Có điều gì thắc mắc hơn về chương trình này không hả bạn? [/Assistant] </s>
```
## Usage
### Ollama
#### Colab ollama quick test
[Colab Ollama](https://colab.research.google.com/drive/1a3vr4tSKQhDxWIP8unMOB-D8VRYfHREt?usp=sharing)
##### Q5_K_M-imat
```
ollama run Tuanpham/t-visstar-7b
```
##### Q4_k_M-imat
```
ollama run Tuanpham/t-visstar-7b:q4_k_m-imat
```
##### IQ4_XS-imat
```
ollama run Tuanpham/t-visstar-7b:iq4_xs-imat
```
![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/aK2YnezwBCNJGKUgww9Y8.png)
![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/2Y3vJOZpDS5m9HxxrYIIH.png)
![image/png](https://cdn-uploads.huggingface.co/production/uploads/63905e87df447b438817b2cd/2jC3mlyLW2kayHt3ZrEUw.png)
### llama.cpp
* [Q5_K_M-imat](https://huggingface.co/1TuanPham/T-VisStar-v0.1-Q5_K_M-GGUF)
* [Q4_k_M-imat](https://huggingface.co/1TuanPham/T-VisStar-v0.1-Q4_K_M-GGUF)
* [IQ4_XS-imat](https://huggingface.co/1TuanPham/T-VisStar-v0.1-IQ4_XS-GGUF)
### Transformers (Recommended)
[Kaggle notebook](https://www.kaggle.com/code/tuanpham2/t-visstar-7b-v0-1-notebook-test)
```python
import datetime
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextStreamer,
StoppingCriteria,
StoppingCriteriaList
)
model_name = "1TuanPham/T-VisStar-7B-v0.1"
model = AutoModelForCausalLM.from_pretrained(model_name,
torch_dtype=torch.float16,
device_map="auto",
use_safetensors=True,
use_cache=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
class StoppingCriteriaSub(StoppingCriteria):
def __init__(self, stops = [], encounters=1):
super().__init__()
self.stops = [stop.to("cuda") for stop in stops]
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
last_token = input_ids[0][-1]
for stop in self.stops:
if tokenizer.decode(stop) == tokenizer.decode(last_token):
return True
return False
stop_words = [tokenizer.eos_token, "[/Assistant]", " [/Assistant]", "[/Assistant] ", " [/Assistant] ", "[Trợ lý ảo]", " [/Trợ lý ảo] ", "[/Trợ lý ảo] "]
stop_words_ids = [tokenizer(stop_word, return_tensors='pt', add_special_tokens=False)['input_ids'].squeeze() for stop_word in stop_words]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
text_streamer = TextStreamer(tokenizer)
system_prompt = """You're an AI Large Language Model(LLM) named T-VisStar developed(created) by a small team call CapLeaf('Nón lá' in Vietnamese), your task are to think loudly step by step before give a good and relevant response to the user request based on their provided documents (if any), answer in the language the user preferred.
* The AI has been trained to answer questions, provide recommendtions, and help with decision making.
* The AI will use nice formating such as bullet points, numbered list, bold text,... when needed.
* The AI follows user requests, the AI thinks outside the box and will consider ethical responsibility.
* The AI will take turn in a multi-turn dialogs conversation with the user, stay in context with the previous chat.
The following is a formal conversation with the AI named T-VisStar.
Current date: CURRENT_DATE ."""
system_prompt = system_prompt.replace("CURRENT_DATE", str(datetime.date.today()))
# Initialize conversation with system prompt
messages = [{"role": "system", "content": system_prompt}]
# Continuous interaction loop
while True:
user_input = input("User: ")
if user_input == "[END]":
messages = [{"role": "system", "content": system_prompt}]
continue
messages.append({"role": "user", "content": user_input})
# Tokenize and format the chat for the model
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
# Generate a response
outputs = model.generate(
input_ids=tokenized_chat.to('cuda'),
max_new_tokens=2048, # Can be up to 8192
do_sample=True,
top_p=0.4,
min_p=0.025,
top_k=40,
temperature=0.35,
repetition_penalty=1.15,
pad_token_id=50256,
streamer=text_streamer,
stopping_criteria=stopping_criteria
)
prompt_length = tokenized_chat.shape[1]
response_text = tokenizer.decode(outputs[0][prompt_length:], skip_special_tokens=True)
for stop in stop_words:
response_text = response_text.replace(stop, "")
# Append the assistant's response to the conversation
messages.append({"role": "assistant", "content": response_text})
```
The model start training in June 2024.
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_1TuanPham__T-VisStar-v0.1)
| Metric |Value|
|-------------------|----:|
|Avg. |18.94|
|IFEval (0-Shot) |36.07|
|BBH (3-Shot) |30.24|
|MATH Lvl 5 (4-Shot)| 4.53|
|GPQA (0-shot) | 4.70|
|MuSR (0-shot) |13.55|
|MMLU-PRO (5-shot) |24.56|

9
added_tokens.json Normal file
View File

@@ -0,0 +1,9 @@
{
"[/Assistant]": 38369,
"[/SYSMSG]": 38366,
"[/USER]": 38368,
"[Assistant]": 38370,
"[PAD]": 38371,
"[SYSMSG]": 38365,
"[USER]": 38367
}

30
config.json Normal file
View File

@@ -0,0 +1,30 @@
{
"_name_or_path": "1TuanPham/BMv2W-s718QKVOa45MLPa16-s130QKVOa32MLPa8-rfpNMs13QKVOa40MLPa16-s26QKVOa16MLPa4-KTOs100a16_v53_05",
"architectures": [
"MistralForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 14336,
"max_position_embeddings": 32768,
"model_type": "mistral",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 8,
"pad_token_id": 38371,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 10000.0,
"sliding_window": 4096,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.44.0",
"unsloth_version": "2024.9",
"use_cache": true,
"vocab_size": 38372
}

8
generation_config.json Normal file
View File

@@ -0,0 +1,8 @@
{
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"max_length": 32768,
"pad_token_id": 38371,
"transformers_version": "4.44.0"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef3ad28880404ea8dd496596a37b95a99889ac57b83e0d6da244f72ef25356b9
size 834455112

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:53ceae9c36fe30b1833d05ba7c6f01ba186ac6f5a200acf78d1957032feb4fef
size 872450056

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ead6892e286dfa3bb819b4e7ad0fd50bb0a788d5f3400aa22eaa1ed24696f398
size 872450056

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:952f2508c5e6de4f58209ddf6de63cc7e83e37cc0ae942df80f16c31a2162fe7
size 872450056

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8dd0011bda1752e2d9124a3844e13a12bbd0bede3c19781777908867c7b36041
size 872450056

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7046abc59932559b59d9d5b36855ac245b9af2193a7c2c0c1fc0eda65a86276d
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4ceefbf8ba67e20b430efb9380f344d3edd32aacaf532268518cccee91d99f44
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1ab72cc1d64e72eaf8fdf610b2dc71d127dbb1cdc687243146dce35287b79262
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:486a8759a858a24c39c5ca56a03537d1ac21010ac34f0b785b8a68548a3aae42
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e7e4ceed0b204bd6b7455417dcfc2ba20da346732f18866f8548b732cb8b3564
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:60b905e7b5bc70cd2ee3b9c9a204ac8391c86c24b1d9b79199d91089a03f0813
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ce0a77e46edc230204ef8e051e334c84d2bfeed42636e4ebe1ab3a7e4e4e098f
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7d07ba15f39bea80f1adc6ccb8c6765a45ca46fb9caf697e9cc50de31ff9b59e
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b65c98663720cc00f5b1ee6c6f77ca464d18bec41fcf8fa5268d9e6bdc62a7b3
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9830dc49950dbbe67f6dba87f5e2b45775d9e0262e5035fae117f0b70619d617
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bbc6e871ab33fe3d9667a1803e40ba3644aa72ffa55cdfb2156445bff95599b6
size 872450072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:66493fbc213b19e6e9603a2cf15b15a61bae3e43f0457806b984a6a057ef13ce
size 666690312

View File

@@ -0,0 +1,298 @@
{
"metadata": {
"total_size": 14587863040
},
"weight_map": {
"lm_head.weight": "model-00017-of-00017.safetensors",
"model.embed_tokens.weight": "model-00001-of-00017.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00017.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00017.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.1.input_layernorm.weight": "model-00002-of-00017.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00017.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00017.safetensors",
"model.layers.10.input_layernorm.weight": "model-00006-of-00017.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00006-of-00017.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.11.input_layernorm.weight": "model-00007-of-00017.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00007-of-00017.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.12.input_layernorm.weight": "model-00007-of-00017.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00007-of-00017.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.13.input_layernorm.weight": "model-00008-of-00017.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00008-of-00017.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00007-of-00017.safetensors",
"model.layers.14.input_layernorm.weight": "model-00008-of-00017.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00008-of-00017.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.15.input_layernorm.weight": "model-00009-of-00017.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00009-of-00017.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00008-of-00017.safetensors",
"model.layers.16.input_layernorm.weight": "model-00009-of-00017.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00009-of-00017.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.17.input_layernorm.weight": "model-00010-of-00017.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00010-of-00017.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00009-of-00017.safetensors",
"model.layers.18.input_layernorm.weight": "model-00010-of-00017.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00010-of-00017.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.19.input_layernorm.weight": "model-00011-of-00017.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00011-of-00017.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.19.self_attn.o_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00010-of-00017.safetensors",
"model.layers.2.input_layernorm.weight": "model-00002-of-00017.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00002-of-00017.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.20.input_layernorm.weight": "model-00011-of-00017.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00011-of-00017.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.21.input_layernorm.weight": "model-00012-of-00017.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00012-of-00017.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00011-of-00017.safetensors",
"model.layers.22.input_layernorm.weight": "model-00012-of-00017.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00012-of-00017.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.23.input_layernorm.weight": "model-00013-of-00017.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00013-of-00017.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00012-of-00017.safetensors",
"model.layers.24.input_layernorm.weight": "model-00013-of-00017.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00013-of-00017.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.25.input_layernorm.weight": "model-00014-of-00017.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00014-of-00017.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00013-of-00017.safetensors",
"model.layers.26.input_layernorm.weight": "model-00014-of-00017.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00014-of-00017.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.27.input_layernorm.weight": "model-00015-of-00017.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00015-of-00017.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00014-of-00017.safetensors",
"model.layers.28.input_layernorm.weight": "model-00015-of-00017.safetensors",
"model.layers.28.mlp.down_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.mlp.gate_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.mlp.up_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.post_attention_layernorm.weight": "model-00015-of-00017.safetensors",
"model.layers.28.self_attn.k_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.self_attn.o_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.self_attn.q_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.28.self_attn.v_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.29.input_layernorm.weight": "model-00016-of-00017.safetensors",
"model.layers.29.mlp.down_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.29.mlp.gate_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.29.mlp.up_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.29.post_attention_layernorm.weight": "model-00016-of-00017.safetensors",
"model.layers.29.self_attn.k_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.29.self_attn.o_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.29.self_attn.q_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.29.self_attn.v_proj.weight": "model-00015-of-00017.safetensors",
"model.layers.3.input_layernorm.weight": "model-00003-of-00017.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00003-of-00017.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00002-of-00017.safetensors",
"model.layers.30.input_layernorm.weight": "model-00016-of-00017.safetensors",
"model.layers.30.mlp.down_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.mlp.gate_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.mlp.up_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.post_attention_layernorm.weight": "model-00016-of-00017.safetensors",
"model.layers.30.self_attn.k_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.self_attn.o_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.self_attn.q_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.30.self_attn.v_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.31.input_layernorm.weight": "model-00017-of-00017.safetensors",
"model.layers.31.mlp.down_proj.weight": "model-00017-of-00017.safetensors",
"model.layers.31.mlp.gate_proj.weight": "model-00017-of-00017.safetensors",
"model.layers.31.mlp.up_proj.weight": "model-00017-of-00017.safetensors",
"model.layers.31.post_attention_layernorm.weight": "model-00017-of-00017.safetensors",
"model.layers.31.self_attn.k_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.31.self_attn.o_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.31.self_attn.q_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.31.self_attn.v_proj.weight": "model-00016-of-00017.safetensors",
"model.layers.4.input_layernorm.weight": "model-00003-of-00017.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00003-of-00017.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.5.input_layernorm.weight": "model-00004-of-00017.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00004-of-00017.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00003-of-00017.safetensors",
"model.layers.6.input_layernorm.weight": "model-00004-of-00017.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00004-of-00017.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.7.input_layernorm.weight": "model-00005-of-00017.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00005-of-00017.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00004-of-00017.safetensors",
"model.layers.8.input_layernorm.weight": "model-00005-of-00017.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00005-of-00017.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.9.input_layernorm.weight": "model-00006-of-00017.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00006-of-00017.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00006-of-00017.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00005-of-00017.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00005-of-00017.safetensors",
"model.norm.weight": "model-00017-of-00017.safetensors"
}
}

36
special_tokens_map.json Normal file
View File

@@ -0,0 +1,36 @@
{
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>",
"[PAD]"
],
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

108386
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

3
tokenizer.model Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e792a804bbfc19a96b61b87109b8f2b0b7c92830025f285b402ba27c0c309c6f
size 596883

107
tokenizer_config.json Normal file
View File

@@ -0,0 +1,107 @@
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"38365": {
"content": "[SYSMSG]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38366": {
"content": "[/SYSMSG]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38367": {
"content": "[USER]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38368": {
"content": "[/USER]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"38369": {
"content": "[/Assistant]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38370": {
"content": "[Assistant]",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"38371": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>",
"[PAD]"
],
"bos_token": "<s>",
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{{ bos_token }}{% if system_message != false %}[SYSMSG] {{ system_message | trim }} [/SYSMSG] \n\n{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 != 0 %}{{bos_token}} {% endif %}[USER] {{ message['content'] | trim }} [/USER] {% elif message['role'] == 'assistant' %}[Assistant] {{ message['content'] | trim }} [/Assistant] {{ eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}[Assistant] {% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": false,
"model_max_length": 32768,
"pad_token": "[PAD]",
"padding_side": "left",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false,
"use_fast": true
}