Adding Evaluation Results (#7)
- Adding Evaluation Results (796f0cfbe24fe87fbde205993ded00360f817e5e) Co-authored-by: Open LLM Leaderboard PR Bot <leaderboard-pr-bot@users.noreply.huggingface.co>
This commit is contained in:
51
.gitattributes
vendored
51
.gitattributes
vendored
@@ -1,47 +1,54 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
model-00001-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00002-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00003-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00004-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00005-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00006-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00007-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00008-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00009-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00010-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00011-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00012-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00013-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00014-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00015-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00016-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00017-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00018-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
model-00019-of-00019.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
149
README.md
149
README.md
@@ -1,47 +1,110 @@
|
||||
---
|
||||
license: Apache License 2.0
|
||||
|
||||
#model-type:
|
||||
##如 gpt、phi、llama、chatglm、baichuan 等
|
||||
#- gpt
|
||||
|
||||
#domain:
|
||||
##如 nlp、cv、audio、multi-modal
|
||||
#- nlp
|
||||
|
||||
#language:
|
||||
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
||||
#- cn
|
||||
|
||||
#metrics:
|
||||
##如 CIDEr、Blue、ROUGE 等
|
||||
#- CIDEr
|
||||
|
||||
#tags:
|
||||
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
||||
#- pretrained
|
||||
|
||||
#tools:
|
||||
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
||||
#- vllm
|
||||
language:
|
||||
- en
|
||||
- de
|
||||
- es
|
||||
- fr
|
||||
- it
|
||||
license: apache-2.0
|
||||
library_name: transformers
|
||||
tags:
|
||||
- dpo
|
||||
- rlaif
|
||||
- preference
|
||||
- ultrafeedback
|
||||
- moe
|
||||
datasets:
|
||||
- argilla/ultrafeedback-binarized-preferences-cleaned
|
||||
base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
pipeline_tag: text-generation
|
||||
model-index:
|
||||
- name: notux-8x7b-v1
|
||||
results: []
|
||||
---
|
||||
### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。
|
||||
#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型
|
||||
|
||||
SDK下载
|
||||
```bash
|
||||
#安装ModelScope
|
||||
pip install modelscope
|
||||
```
|
||||
```python
|
||||
#SDK模型下载
|
||||
from modelscope import snapshot_download
|
||||
model_dir = snapshot_download('argilla/notux-8x7b-v1')
|
||||
```
|
||||
Git下载
|
||||
```
|
||||
#Git模型下载
|
||||
git clone https://www.modelscope.cn/argilla/notux-8x7b-v1.git
|
||||
```
|
||||
<div align="center">
|
||||
<img src="https://cdn-uploads.huggingface.co/production/uploads/60f0608166e5701b80ed3f02/dj-spsk9eXMMXVGxK6jRz.png" alt="A banner representing Notus, the wind god of the south, in a mythical and artistic style. The banner features a strong, swirling breeze, embodying the warm, wet character of the southern wind. Gracefully flowing across the scene are several paper planes, caught in the gentle yet powerful gusts of Notus. The background is a blend of warm colors, symbolizing the heat of the south, with hints of blue and green to represent the moisture carried by this wind. The overall atmosphere is one of dynamic movement and warmth."/>
|
||||
</div>
|
||||
|
||||
|
||||
# Model Card for Notux 8x7B-v1
|
||||
|
||||
This model is a preference-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) dataset using DPO (Direct Preference Optimization).
|
||||
|
||||
As of Dec 26th 2023, it outperforms `Mixtral-8x7B-Instruct-v0.1` and is the top ranked MoE (Mixture of Experts) model on the [Hugging Face Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
|
||||
|
||||
This is part of the Notus family of models and experiments, where the Argilla team investigates data-first and preference tuning methods like dDPO (distilled DPO). This model is the result of our first experiment at tuning a MoE model that has already been fine-tuned with DPO (i.e., Mixtral-8x7B-Instruct-v0.1).
|
||||
|
||||
## Model Details
|
||||
|
||||
### Model Description
|
||||
|
||||
- **Developed by:** Argilla (based on MistralAI previous efforts)
|
||||
- **Shared by:** Argilla
|
||||
- **Model type:** Pretrained generative Sparse Mixture of Experts
|
||||
- **Language(s) (NLP):** English, Spanish, Italian, German, and French
|
||||
- **License:** MIT
|
||||
- **Finetuned from model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
|
||||
|
||||
### Model Sources
|
||||
|
||||
- **Repository:** https://github.com/argilla-io/notus
|
||||
- **Paper:** N/A
|
||||
|
||||
## Training Details
|
||||
|
||||
### Training Hardware
|
||||
|
||||
We used a VM with 8 x H100 80GB hosted in runpod.io for 1 epoch (~10hr).
|
||||
|
||||
### Training Data
|
||||
|
||||
We used a new iteration of the Argilla UltraFeedback preferences dataset named [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned).
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 5e-07
|
||||
- train_batch_size: 8
|
||||
- eval_batch_size: 4
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 8
|
||||
- total_train_batch_size: 64
|
||||
- total_eval_batch_size: 32
|
||||
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
||||
- lr_scheduler_type: linear
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
||||
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
||||
| 0.4384 | 0.22 | 200 | 0.4556 | -0.3275 | -1.9448 | 0.7937 | 1.6174 | -405.7994 | -397.8617 | -1.3157 | -1.4511 |
|
||||
| 0.4064 | 0.43 | 400 | 0.4286 | -0.2163 | -2.2090 | 0.8254 | 1.9927 | -408.4409 | -396.7496 | -0.7660 | -0.6539 |
|
||||
| 0.3952 | 0.65 | 600 | 0.4275 | -0.1311 | -2.1603 | 0.8016 | 2.0291 | -407.9537 | -395.8982 | -0.6783 | -0.7206 |
|
||||
| 0.3909 | 0.87 | 800 | 0.4167 | -0.2273 | -2.3146 | 0.8135 | 2.0872 | -409.4968 | -396.8602 | -0.8458 | -0.7738 |
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.36.0
|
||||
- Pytorch 2.1.0+cu118
|
||||
- Datasets 2.14.6
|
||||
- Tokenizers 0.15.0
|
||||
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_argilla__notus-8x7b-experiment)
|
||||
|
||||
| Metric |Value|
|
||||
|---------------------------------|----:|
|
||||
|Avg. |73.18|
|
||||
|AI2 Reasoning Challenge (25-Shot)|70.99|
|
||||
|HellaSwag (10-Shot) |87.73|
|
||||
|MMLU (5-Shot) |71.33|
|
||||
|TruthfulQA (0-shot) |65.79|
|
||||
|Winogrande (5-shot) |81.61|
|
||||
|GSM8k (5-shot) |61.64|
|
||||
|
||||
<p style="color: lightgrey;">如果您是本模型的贡献者,我们邀请您根据<a href="https://modelscope.cn/docs/ModelScope%E6%A8%A1%E5%9E%8B%E6%8E%A5%E5%85%A5%E6%B5%81%E7%A8%8B%E6%A6%82%E8%A7%88" style="color: lightgrey; text-decoration: underline;">模型贡献文档</a>,及时完善模型卡片内容。</p>
|
||||
21
all_results.json
Normal file
21
all_results.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"eval_logits/chosen": -1.2132024765014648,
|
||||
"eval_logits/rejected": -1.2924575805664062,
|
||||
"eval_logps/chosen": -396.52020263671875,
|
||||
"eval_logps/rejected": -409.31964111328125,
|
||||
"eval_loss": 0.4216844439506531,
|
||||
"eval_rewards/accuracies": 0.8134920597076416,
|
||||
"eval_rewards/chosen": -0.1933162659406662,
|
||||
"eval_rewards/margins": 2.1035311222076416,
|
||||
"eval_rewards/rejected": -2.296847343444824,
|
||||
"eval_runtime": 398.9749,
|
||||
"eval_samples": 2000,
|
||||
"eval_samples_per_second": 5.013,
|
||||
"eval_steps_per_second": 0.158,
|
||||
"train_loss": 0.4461688995361328,
|
||||
"train_runtime": 44067.2139,
|
||||
"train_samples": 58917,
|
||||
"train_samples_per_second": 1.337,
|
||||
"train_steps_per_second": 0.021
|
||||
}
|
||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"architectures": [
|
||||
"MixtralForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 32768,
|
||||
"model_type": "mixtral",
|
||||
"num_attention_heads": 32,
|
||||
"num_experts_per_tok": 2,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"num_local_experts": 8,
|
||||
"output_router_logits": false,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_theta": 1000000.0,
|
||||
"router_aux_loss_coef": 0.02,
|
||||
"sliding_window": 4096,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.36.0",
|
||||
"use_cache": false,
|
||||
"vocab_size": 32000
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||
16
eval_results.json
Normal file
16
eval_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"eval_logits/chosen": -1.2132024765014648,
|
||||
"eval_logits/rejected": -1.2924575805664062,
|
||||
"eval_logps/chosen": -396.52020263671875,
|
||||
"eval_logps/rejected": -409.31964111328125,
|
||||
"eval_loss": 0.4216844439506531,
|
||||
"eval_rewards/accuracies": 0.8134920597076416,
|
||||
"eval_rewards/chosen": -0.1933162659406662,
|
||||
"eval_rewards/margins": 2.1035311222076416,
|
||||
"eval_rewards/rejected": -2.296847343444824,
|
||||
"eval_runtime": 398.9749,
|
||||
"eval_samples": 2000,
|
||||
"eval_samples_per_second": 5.013,
|
||||
"eval_steps_per_second": 0.158
|
||||
}
|
||||
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"transformers_version": "4.36.0"
|
||||
}
|
||||
3
model-00001-of-00019.safetensors
Normal file
3
model-00001-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:11f2ec2e11347fa0c374b236801fe3d432e30c900905c61483ae9f189dd5808d
|
||||
size 4892809584
|
||||
3
model-00002-of-00019.safetensors
Normal file
3
model-00002-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:49ea4c94bf1abd460b2c09392b946e58e129e1e3a53ed702ce06d13b764bac39
|
||||
size 4983004016
|
||||
3
model-00003-of-00019.safetensors
Normal file
3
model-00003-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ada743d60cf3c5df04a9dace1bb9fb5fd16c1352dddcfa5e1e8a22b2e8a585f
|
||||
size 4983004016
|
||||
3
model-00004-of-00019.safetensors
Normal file
3
model-00004-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d7496f8eefdda8a98946e32c07b16650ea23186363ccfa6afced76e6c1e15ae5
|
||||
size 4899035200
|
||||
3
model-00005-of-00019.safetensors
Normal file
3
model-00005-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:648bdaa1a215f57aea8cc29e66dc22b9a3d23ddab6ea86bba491622c7bfa9425
|
||||
size 4983004016
|
||||
3
model-00006-of-00019.safetensors
Normal file
3
model-00006-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d9668bb97c9c15c8496bc738549d2ff65c9f408869f8b673a8dc44f7ccf07be0
|
||||
size 4983004016
|
||||
3
model-00007-of-00019.safetensors
Normal file
3
model-00007-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:289504c1cd644785db1253537488ce04828517a4b3fa1dd9c3f3696ac5489d74
|
||||
size 4899035248
|
||||
3
model-00008-of-00019.safetensors
Normal file
3
model-00008-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:723a2545cb4f3b03d282ac0a914b4463f861c2f3a899c3e590227e185a866876
|
||||
size 4983004072
|
||||
3
model-00009-of-00019.safetensors
Normal file
3
model-00009-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bd4b0f8cb00843bae0d0c9c4135039abcdf82d6ce5dc5962176b0fa5e1a2a1b0
|
||||
size 4983004072
|
||||
3
model-00010-of-00019.safetensors
Normal file
3
model-00010-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9ecfb4d51efb4e545d8c5f941e1cff6ddedcc21753a4616012f04e887ff4a97a
|
||||
size 4899035248
|
||||
3
model-00011-of-00019.safetensors
Normal file
3
model-00011-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f2d22020018d58f1daee771c300911bd7d39ab7a1e8341f3386f4dc28b0191d3
|
||||
size 4983004072
|
||||
3
model-00012-of-00019.safetensors
Normal file
3
model-00012-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8a24f06bdb175d79e93e29040b37da98e786c7195675bd2f5da28e07484bdb6a
|
||||
size 4983004072
|
||||
3
model-00013-of-00019.safetensors
Normal file
3
model-00013-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9da22cbd79d759e7d5c0bd61faca98443e18ad77154c777901d1b0ebc68503ae
|
||||
size 4983004072
|
||||
3
model-00014-of-00019.safetensors
Normal file
3
model-00014-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:dfdfac6da5c5f503367f73cc6aed1b86e0968ae0e5cad46cdee1a79bb1c4481e
|
||||
size 4899035248
|
||||
3
model-00015-of-00019.safetensors
Normal file
3
model-00015-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5f583fc17b1c5230b0cfbf5571aa275d044828aa641cbbaf3b3eec800975e8c4
|
||||
size 4983004072
|
||||
3
model-00016-of-00019.safetensors
Normal file
3
model-00016-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3da333874351b8edd12e6a42cdf616c3cae0841ec86f12fa29f5e6c70d1b39b0
|
||||
size 4983004072
|
||||
3
model-00017-of-00019.safetensors
Normal file
3
model-00017-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aba2d6a68b8ff90041579d8ce6713e7487c9f4c31f4e181fb08b35299de7b45b
|
||||
size 4899035248
|
||||
3
model-00018-of-00019.safetensors
Normal file
3
model-00018-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:37f26b990e1ee7a3db9a41c78de21e9edb1e1ee7d46f853d6a1931460d72a38d
|
||||
size 4983004072
|
||||
3
model-00019-of-00019.safetensors
Normal file
3
model-00019-of-00019.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cd4c7992df71e2366b507506231eaa6469fed4517fb912bd846f5b0c9f678195
|
||||
size 4221679088
|
||||
1002
model.safetensors.index.json
Normal file
1002
model.safetensors.index.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6c4d33c44e51ed7df3b9a12fd3c68cfcd710b3a158da80f98b05eca0eaeb1f0b
|
||||
size 7115
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:64bf9363ff96d8377b76288ff1b207d2e46930317a13f5e65a1a771e47cba881
|
||||
size 66756
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35de71d65957abba8fdb669fab7eb032c995ed33e8badf20a48305a8cc39906f
|
||||
size 828
|
||||
24
special_tokens_map.json
Normal file
24
special_tokens_map.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "</s>",
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
91122
tokenizer.json
Normal file
91122
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
43
tokenizer_config.json
Normal file
43
tokenizer_config.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"add_bos_token": true,
|
||||
"add_eos_token": false,
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [],
|
||||
"bos_token": "<s>",
|
||||
"chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"legacy": true,
|
||||
"model_max_length": 2048,
|
||||
"pad_token": "</s>",
|
||||
"sp_model_kwargs": {},
|
||||
"spaces_between_special_tokens": false,
|
||||
"tokenizer_class": "LlamaTokenizer",
|
||||
"unk_token": "<unk>",
|
||||
"use_default_system_prompt": false
|
||||
}
|
||||
8
train_results.json
Normal file
8
train_results.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"train_loss": 0.4461688995361328,
|
||||
"train_runtime": 44067.2139,
|
||||
"train_samples": 58917,
|
||||
"train_samples_per_second": 1.337,
|
||||
"train_steps_per_second": 0.021
|
||||
}
|
||||
1396
trainer_state.json
Normal file
1396
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:123a17f8bc7975953baf4fab8d4f0cdc1554799d1d940dcb0db12f3f3b1fc0a0
|
||||
size 5880
|
||||
Reference in New Issue
Block a user